**Create DB on AWS**

In [None]:
import psycopg2
import pandas as pd
from psycopg2 import sql

conn = psycopg2.connect(
    host="",
    dbname="postgres",
    user="postgres",
    password="",
    port=5432
)

conn.autocommit = True

cur = conn.cursor()
cur.execute("CREATE DATABASE telco_churn;")

cur.close()
conn.close()


**Create Table on DB**

In [None]:
import psycopg2

# Hardcoded credentials (as requested)
HOST = ""
PORT = 5432
USER = "postgres"
PASSWORD = ""
MASTER_DB = "postgres"   # connect here to create the database
DB = "telco_churn"

# 1) Create DB if missing
conn = psycopg2.connect(host=HOST, port=PORT, dbname=MASTER_DB, user=USER, password=PASSWORD, sslmode="require")
conn.autocommit = True
cur = conn.cursor()
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (DB,))
if cur.fetchone() is None:
    cur.execute(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(DB)))
cur.close()
conn.close()

# 2) Create table in the created DB
conn = psycopg2.connect(host=HOST, port=PORT, dbname=DB, user=USER, password=PASSWORD, sslmode="require")
cur = conn.cursor()
cur.execute("""
CREATE TABLE IF NOT EXISTS public.customers (
    customer_id        TEXT PRIMARY KEY,
    gender             TEXT,
    senior_citizen     BOOLEAN,
    partner            BOOLEAN,
    dependents         BOOLEAN,
    tenure             INTEGER,
    phone_service      BOOLEAN,
    multiple_lines     TEXT,
    internet_service   TEXT,
    online_security    TEXT,
    online_backup      TEXT,
    device_protection  TEXT,
    tech_support       TEXT,
    streaming_tv       TEXT,
    streaming_movies   TEXT,
    contract            TEXT,
    paperless_billing   BOOLEAN,
    payment_method      TEXT,
    monthly_charges     NUMERIC(10,2),
    total_charges       NUMERIC(10,2),
    churn               BOOLEAN
);
""")
conn.commit()
cur.close()
conn.close()
print("Done.")

Done.


**Check Table columsn on AWS DB**

In [15]:
# Connect to the database
conn = psycopg2.connect(
    host=HOST,
    port=PORT,
    dbname=DB,
    user=USER,
    password=PASSWORD,
    sslmode="require"
)

cur = conn.cursor()

table_name = 'customers'
print(f"Schema for table '{table_name}':")
cur.execute("""
    SELECT column_name, data_type, is_nullable
    FROM information_schema.columns
    WHERE table_schema = 'public'
      AND table_name = %s
    ORDER BY ordinal_position;
""", (table_name,))
columns = cur.fetchall()
for col in columns:
    print(f"  {col[0]} | {col[1]} | nullable: {col[2]}")

cur.close()
conn.close()


Schema for table 'customers':
  customer_id | text | nullable: NO
  gender | text | nullable: YES
  senior_citizen | boolean | nullable: YES
  partner | boolean | nullable: YES
  dependents | boolean | nullable: YES
  tenure | integer | nullable: YES
  phone_service | boolean | nullable: YES
  multiple_lines | text | nullable: YES
  internet_service | text | nullable: YES
  online_security | text | nullable: YES
  online_backup | text | nullable: YES
  device_protection | text | nullable: YES
  tech_support | text | nullable: YES
  streaming_tv | text | nullable: YES
  streaming_movies | text | nullable: YES
  contract | text | nullable: YES
  paperless_billing | boolean | nullable: YES
  payment_method | text | nullable: YES
  monthly_charges | numeric | nullable: YES
  total_charges | numeric | nullable: YES
  churn | boolean | nullable: YES


**Insert CSV file into AWS DB Table**

In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection parameters (re-using existing kernel state values)
DB_NAME = 'telco_churn'
DB_USER = 'postgres'
DB_PASSWORD = ''
RDS_HOST = ''
RDS_PORT = 5432
TABLE_NAME = 'customers'

# Construct the database connection string
DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{RDS_HOST}:{RDS_PORT}/{DB_NAME}"

# Create a SQLAlchemy engine
engine = create_engine(DATABASE_URL)

# Path to your CSV file
csv_file_path = 'ref_data.csv'

try:
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file_path)

    # Rename DataFrame columns to match the PostgreSQL table schema
    df = df.rename(columns={
        'customerID': 'customer_id',
        'SeniorCitizen': 'senior_citizen',
        'Partner': 'partner',
        'Dependents': 'dependents',
        'PhoneService': 'phone_service',
        'PaperlessBilling': 'paperless_billing',
        'MonthlyCharges': 'monthly_charges',
        'TotalCharges': 'total_charges',
        'Churn': 'churn',
        'MultipleLines': 'multiple_lines',
        'InternetService': 'internet_service',
        'OnlineSecurity': 'online_security',
        'OnlineBackup': 'online_backup',
        'DeviceProtection': 'device_protection',
        'TechSupport': 'tech_support',
        'StreamingTV': 'streaming_tv',
        'StreamingMovies': 'streaming_movies',
        'Contract': 'contract',
        'PaymentMethod': 'payment_method'
    })

    # Convert 'Yes'/'No' strings to boolean for relevant columns
    boolean_cols = ['senior_citizen', 'partner', 'dependents', 'phone_service', 'paperless_billing', 'churn']
    for col in boolean_cols:
        # Ensure the column exists before trying to convert
        if col in df.columns:
            df[col] = df[col].map({'Yes': True, 'No': False, 1: True, 0: False, '1':True, '0':False})

    # Convert 'TotalCharges' to numeric, coercing errors to NaN
    # Then fill NaN with 0 or a suitable default if needed for the database
    df['total_charges'] = pd.to_numeric(df['total_charges'], errors='coerce').fillna(0)

    # Upload the DataFrame to the PostgreSQL table
    # if_exists='append' will add rows to the existing table, preserving the schema
    # index=False prevents writing the DataFrame index as a column in the DB
    df.to_sql(TABLE_NAME, engine, if_exists='append', index=False)

    print(f"Data from '{csv_file_path}' successfully uploaded to table '{TABLE_NAME}' in '{DB_NAME}'.")

except FileNotFoundError:
    print(f"Error: The file '{csv_file_path}' was not found. Please ensure it's uploaded to your Colab environment.")
except Exception as e:
    print(f"An error occurred during database upload: {e}")


Data from 'ref_data.csv' successfully uploaded to table 'customers' in 'telco_churn'.


**Some queries**

In [None]:
import psycopg2
import pandas as pd

# RDS connection parameters (reusing values from kernel state)
HOST = ""
PORT = 5432
DB = "telco_churn"
USER = "postgres"
PASSWORD = ""

# Connect to the database
conn = psycopg2.connect(
    host=HOST,
    port=PORT,
    dbname=DB,
    user=USER,
    password=PASSWORD,
    sslmode="require"
)

cur = conn.cursor()

# Query 1: Select a few customers
print("\n--- First 5 customers ---")
cur.execute("SELECT * FROM customers LIMIT 5;")
columns = [desc[0] for desc in cur.description]
rows = cur.fetchall()
df_customers = pd.DataFrame(rows, columns=columns)
display(df_customers)

# Query 2: Count all customers
print("\n--- Total customer count ---")
cur.execute("SELECT COUNT(*) FROM customers;")
count = cur.fetchone()[0]
print(f"Total number of customers: {count}")

cur.close()
conn.close()


--- First 5 customers ---


Unnamed: 0,customer_id,gender,senior_citizen,partner,dependents,tenure,phone_service,multiple_lines,internet_service,online_security,...,device_protection,tech_support,streaming_tv,streaming_movies,contract,paperless_billing,payment_method,monthly_charges,total_charges,churn
0,7590-VHVEG,Female,False,True,False,1,False,No phone service,DSL,No,...,No,No,No,No,Month-to-month,True,Electronic check,29.85,29.85,False
1,5575-GNVDE,Male,False,False,False,34,True,No,DSL,Yes,...,Yes,No,No,No,One year,False,Mailed check,56.95,1889.5,False
2,3668-QPYBK,Male,False,False,False,2,True,No,DSL,Yes,...,No,No,No,No,Month-to-month,True,Mailed check,53.85,108.15,True
3,7795-CFOCW,Male,False,False,False,45,False,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,False,Bank transfer (automatic),42.3,1840.75,False
4,9237-HQITU,Female,False,False,False,2,True,No,Fiber optic,No,...,No,No,No,No,Month-to-month,True,Electronic check,70.7,151.65,True



--- Total customer count ---
Total number of customers: 7043
