In [1]:
#pip install pandas pymongo sqlalchemy streamlit

In [2]:
import pandas as pd

# Load the dataset
data_url = 'C:\Courses\census_2011.csv'  # Change to your dataset path
df = pd.read_csv(data_url)

# Task 1: Rename Columns

In [3]:
# Rename columns
df.rename(columns={
    'State name': 'State/UT',
    'District name': 'District',
    'Male_Literate': 'Literate_Male',
    'Female_Literate': 'Literate_Female',
    'Rural_Households': 'Households_Rural',
    'Urban_Households': 'Households_Urban',
    'Age_Group_0_29': 'Young_and_Adult',
    'Age_Group_30_49': 'Middle_Aged',
    'Age_Group_50': 'Senior_Citizen',
    'Age not stated': 'Age_Not_Stated',
    'Households_with_TV_Computer_Laptop_Telephone_mobile_phone_and_Scooter_Car': 'Household_with_ammunities',
    'Type_of_latrine_facility_Night_soil_disposed_into_open_drain_Households': 'Type_of_latrine_facility_Night',
    'Type_of_latrine_facility_Flush_pour_flush_latrine_connected_to_other_system_Households': 'Type_of_latrine_facility_Flush',
    'Not_having_latrine_facility_within_the_premises_Alternative_source_Open_Households': 'Not_having_latrine_facility',
    'Main_source_of_drinking_water_Handpump_Tubewell_Borewell_Households': 'Main_source_of_drinking_water_Handpump',
    'Main_source_of_drinking_water_Other_sources_Spring_River_Canal_Tank_Pond_Lake_Other_sources__Households': 'Main_source_of_drinking_water_Other',
}, inplace=True)


# Task 2: Standardize State/UT Names

In [4]:
def standardize_state_name(name):
    words = name.split()
    return ' '.join([word.capitalize() if word != 'and' else word.lower() for word in words])

df['State/UT'] = df['State/UT'].apply(standardize_state_name)

# Task 3: Handle New State Formation

In [5]:
def update_state_formation(df, telangana_file, ladakh_districts=["Leh", "Kargil"]):
    with open('C:\Courses\Telangana.txt', "r") as file:
        telangana_districts = file.read().splitlines()

    df.loc[df["District"].isin(telangana_districts), "State/UT"] = "Telangana"
    df.loc[df["District"].isin(ladakh_districts), "State/UT"] = "Ladakh"
    return df

In [6]:
#df_ladakh = df[df["State/UT"] == "Ladakh"]
#df_ladakh

In [7]:
#df_Telangana = df[df["State/UT"] == "Telangana"]
#df_Telangana

# Task 4: Handle Missing Data

In [8]:
def process_missing_data(df):
    # Calculate and fill missing values
    df['Population'] = df['Male'] + df['Female']
    df['Literate'] = df['Literate_Male'] + df['Literate_Female']
    df['Population'] = df[['Young_and_Adult', 'Middle_Aged', 'Senior_Citizen', 'Age_Not_Stated']].sum(axis=1)
    df['Households'] = df['Households_Rural'] + df['Households_Urban']
    missing_data = df.isnull().mean() * 100
    return df, missing_data

# Task 5: Save Data to MongoDB

In [9]:
#pip install pymongo

In [21]:
import pandas as pd
from pymongo import MongoClient
from sqlalchemy import create_engine

In [22]:
# Part 1: Save Data to MongoDB (Task 5)
def save_to_mongodb(df):
    client = MongoClient('mongodb://localhost:27017/')
    db = client['census_data']
    collection = db['census']
    
    # Clear any existing data in the collection (optional, based on use case)
    collection.delete_many({})
    
    # Convert DataFrame to dictionary format and insert into MongoDB
    collection.insert_many(df.to_dict('records'))
    print("Data saved to MongoDB.")
    client.close()

In [23]:
# Part 2: Fetch Data from MongoDB (for Task 6)
def fetch_from_mongodb():
    client = MongoClient('mongodb://localhost:27017/')
    db = client['census_data']
    collection = db['census']
    
    # Fetch data from MongoDB and load it into a DataFrame
    data = pd.DataFrame(list(collection.find()))
    print("Data fetched from MongoDB.")
    client.close()
    return data

In [24]:
# Part 3: Upload Data to MySQL (Task 6)
def upload_to_mysql(df, username, password, database_name):
    # Create SQLAlchemy engine to connect to MySQL
    try:
        engine = create_engine(f'mysql+pymysql://{username}:{password}@localhost/{database_name}')
        connection = engine.connect()
        print("Connection to MySQL database successful.")
        
        # Replace table if it exists or create new table if not
        df.to_sql('census_data', con=engine, if_exists='replace', index=False)
        print("Data uploaded to MySQL successfully.")

    except Exception as e:
        print(f"Error connecting to or uploading data to MySQL: {e}")
    
    finally:
        if 'connection' in locals():
            connection.close()


In [27]:
# Main Execution
if __name__ == "__main__":
    # Read cleaned census data from CSV
    df = pd.read_csv("C:\Courses\census_2011.csv")

    # Task 5: Save data to MongoDB
    save_to_mongodb(df)

    # Task 6: Fetch data from MongoDB
    fetched_df = fetch_from_mongodb()

    # Task 6: Upload fetched data to MySQL
    username = 'root'         # Replace with your MySQL username
    password = 'root123'      # Replace with your MySQL password
    database_name = 'census_data'  # Ensure this database exists in MySQL

    upload_to_mysql(fetched_df, username, password, database_name)


Data saved to MongoDB.
Data fetched from MongoDB.
Error connecting to or uploading data to MySQL: (pymysql.err.OperationalError) (1130, "Host 'localhost' is not allowed to connect to this MariaDB server")
(Background on this error at: https://sqlalche.me/e/14/e3q8)


In [28]:
# Ensure the DataFrame (df) is defined and ready for upload
try:
    # Upload DataFrame to SQL if connection was successful
    if connection is not None:
        df.to_sql('census_data', con=engine, if_exists='replace', index=False)
        print("DataFrame uploaded successfully.")
    else:
        print("Connection not established. DataFrame not uploaded.")
except Exception as e:
    print(f"Error uploading DataFrame: {e}")
finally:
    if connection is not None:  # Only close if the connection was established
        connection.close()

Connection not established. DataFrame not uploaded.
