<a href="https://colab.research.google.com/github/Vidhi2512002/Tech-bharat-ai-fellowship/blob/main/tech_bharat_ai_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
from datetime import datetime
from google.colab import files

# ===== Agents =====

def ingestion_agent(path):
    df = pd.read_csv(path)
    print(f"[INGESTION] Loaded dataset with shape {df.shape}")
    return df

def standardization_agent(df):
    df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
    print("[STANDARDIZATION] Column names standardized.")
    return df

def cleaning_agent(df):
    df = df.drop_duplicates()
    df = df.dropna(how='all')
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].fillna('Unknown')
    print("[CLEANING] Missing values handled, duplicates removed.")
    return df

def insight_agent(df):
    insights = []
    insights.append(f"Shape after cleaning: {df.shape}")
    if any(pd.api.types.is_numeric_dtype(df[col]) for col in df.columns):
        insights.append("\nNumerical Summary:")
        insights.append(df.describe().to_string())
    print("[INSIGHT] Insights generated.")
    return "\n".join(insights)

def recommendation_agent(insights):
    rec = "Based on detected trends, focus on districts with low coverage and improve resource allocation."
    print("[RECOMMENDATION] Policy suggestion generated.")
    return rec

def logger_agent(log_content, log_dir="outputs/logs"):
    from pathlib import Path
    Path(log_dir).mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = f"{log_dir}/run_log_{timestamp}.txt"
    with open(log_file, 'w') as f:
        f.write(log_content)
    print(f"[LOGGER] Log saved at {log_file}")

# ===== Main Workflow for Colab =====


uploaded = files.upload()  # Drag-and-drop your Telangana Open Data file here
DATA_PATH = list(uploaded.keys())[0]


df = ingestion_agent(DATA_PATH)
df = standardization_agent(df)
df = cleaning_agent(df)
insights = insight_agent(df)
recommendations = recommendation_agent(insights)

log_content = f"=== RTGS AI Analyst Run ===\n\nINSIGHTS:\n{insights}\n\nRECOMMENDATIONS:\n{recommendations}"
logger_agent(log_content)


Saving agricultural_2019_4.csv to agricultural_2019_4 (2).csv
[INGESTION] Loaded dataset with shape (7465, 12)
[STANDARDIZATION] Column names standardized.
[CLEANING] Missing values handled, duplicates removed.
[INSIGHT] Insights generated.
[RECOMMENDATION] Policy suggestion generated.
[LOGGER] Log saved at outputs/logs/run_log_20250906_131219.txt


In [10]:
import pandas as pd

# Load the dataset
df_preview = pd.read_csv("consumption_detail_06_2021_domestic.csv")

# Show first 10 rows and column names
print("Available Columns:\n", df_preview.columns.tolist())
df_preview.head(10)


Available Columns:
 ['circle', 'division', 'subdivision', 'section', 'area', 'catdesc', 'catcode', 'totservices', 'billdservices', 'units', 'load']


Unnamed: 0,circle,division,subdivision,section,area,catdesc,catcode,totservices,billdservices,units,load
0,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,ALLAMTHOTABAI,DOMESTIC,1,1134,1102.0,153243.0,1295.16
1,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,ALLAMTHOTABAI APTS,DOMESTIC,1,798,790.0,248693.0,2911.33
2,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,ALLAUDDIN BUILDINGS,DOMESTIC,1,1297,1230.0,236843.0,2426.14
3,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,B.PET RAILWAY STATION,DOMESTIC,1,315,300.0,43302.0,335.62
4,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,BEGUMPET BASTHI,DOMESTIC,1,769,732.0,116407.0,826.17
5,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,BRAHMANWADI,DOMESTIC,1,1128,1056.0,175968.0,1287.53
6,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,BRAHMANWADI APTS,DOMESTIC,1,1209,1162.0,329832.0,4003.84
7,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,CHIKOTI GARDEN,DOMESTIC,1,173,153.0,70018.0,572.77
8,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,CHIKOTIGADEN APTS,DOMESTIC,1,821,811.0,381566.0,4495.04
9,BANJARA HILLS,GREEN LANDS,GREENLANDS,BEGUMPET,DWARAKA DASS COLONY,DOMESTIC,1,1118,1107.0,170338.0,1566.17


In [11]:
import pandas as pd
from datetime import datetime
from pathlib import Path

# ===== Agents =====

def ingestion_agent(path):
    df = pd.read_csv(path)
    print(f"[INGESTION] Loaded dataset with shape {df.shape}")
    return df

def standardization_agent(df):
    df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
    print("[STANDARDIZATION] Column names standardized.")
    return df

def cleaning_agent(df):
    df = df.drop_duplicates()
    df = df.dropna(how='all')
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].fillna('Unknown')
    print("[CLEANING] Missing values handled, duplicates removed.")
    return df

def insight_agent(df, main_column="units"):
    insights = []
    insights.append(f"Shape after cleaning: {df.shape}")

    if main_column in df.columns:
        insights.append(f"\nMain Metric Analyzed: {main_column}")
        insights.append(f"Mean: {df[main_column].mean():.2f}")
        insights.append(f"Median: {df[main_column].median():.2f}")
        insights.append(f"Std Dev: {df[main_column].std():.2f}")

        if 'circle' in df.columns:
            insights.append("\nTop 5 circles by average units:")
            insights.append(df.groupby('circle')[main_column].mean().nlargest(5).to_string())
            insights.append("\nBottom 5 circles by average units:")
            insights.append(df.groupby('circle')[main_column].mean().nsmallest(5).to_string())
    else:
        insights.append("\n[WARNING] Main column not found. Showing general numeric summary:")
        insights.append(df.describe().to_string())

    print("[INSIGHT] Enhanced insights generated.")
    return "\n".join(insights)

def recommendation_agent(insights):
    rec = "Focus on low-performing circles with low unit consumption for targeted energy efficiency programs."
    print("[RECOMMENDATION] Policy suggestion generated.")
    return rec

def logger_agent(log_content, log_dir="outputs/logs"):
    Path(log_dir).mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = f"{log_dir}/run_log_{timestamp}.txt"
    with open(log_file, 'w') as f:
        f.write(log_content)
    print(f"[LOGGER] Log saved at {log_file}")



DATA_PATH = "consumption_detail_06_2021_domestic.csv"

df = ingestion_agent(DATA_PATH)
df = standardization_agent(df)
df = cleaning_agent(df)


main_column = "units"  # you can change if needed

insights = insight_agent(df, main_column)
recommendations = recommendation_agent(insights)

log_content = f"=== RTGS AI Analyst Run ===\n\nINSIGHTS:\n{insights}\n\nRECOMMENDATIONS:\n{recommendations}"
logger_agent(log_content)

print("\nFINAL INSIGHTS:\n", insights)


[INGESTION] Loaded dataset with shape (14005, 11)
[STANDARDIZATION] Column names standardized.
[CLEANING] Missing values handled, duplicates removed.
[INSIGHT] Enhanced insights generated.
[RECOMMENDATION] Policy suggestion generated.
[LOGGER] Log saved at outputs/logs/run_log_20250906_130902.txt

FINAL INSIGHTS:
 Shape after cleaning: (14005, 11)

Main Metric Analyzed: units
Mean: 67676.21
Median: 26405.00
Std Dev: 112999.46

Top 5 circles by average units:
circle
CYBERCITY            180299.090110
BANJARA HILLS        163746.919315
HYDERABAD CENTRAL    132759.111860
MEDCHAL              127907.363514
SAROORNAGAR           95433.957027

Bottom 5 circles by average units:
circle
NAGARKURNOOL    16305.503704
VIKARABAD       18836.232278
MEDAK           21196.710623
WANAPARTHY      24024.489796
GADWAL          24457.473856


In [15]:
import pandas as pd
from datetime import datetime
from pathlib import Path

# ===== Agents =====

def ingestion_agent(path):
    df = pd.read_csv(path)
    print(f"[DATA INGESTION] Successfully loaded dataset: {df.shape[0]} rows, {df.shape[1]} columns")
    return df

def standardization_agent(df):
    df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
    print("[STANDARDIZATION] Column names and formats standardized.")
    return df

def cleaning_agent(df):
    df = df.drop_duplicates().dropna(how='all')
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].fillna('Unknown')
    print("[CLEANING] Missing values handled, duplicates removed.")
    return df

def insight_agent(df):
    insights = []
    numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
    main_metric = 'units' if 'units' in df.columns else numeric_cols[0]

    insights.append(f"Shape after cleaning: {df.shape}")
    insights.append(f"Main Metric Analyzed: {main_metric}")
    insights.append(f"Mean: {df[main_metric].mean():,.2f}")
    insights.append(f"Median: {df[main_metric].median():,.2f}")
    insights.append(f"Std Dev: {df[main_metric].std():,.2f}")

    if 'circle' in df.columns:
        top = df.groupby('circle')[main_metric].mean().sort_values(ascending=False).head(5)
        bottom = df.groupby('circle')[main_metric].mean().sort_values().head(5)
        insights.append("\nTop 5 Circles by Average Units:\n" + top.to_string())
        insights.append("\nBottom 5 Circles by Average Units:\n" + bottom.to_string())

    print("[ANALYSIS] Enhanced insights generated.")
    return "\n".join(insights)

def recommendation_agent():
    rec = (
        "Prioritize infrastructure optimization in high-consumption zones (e.g., Cybercity & Banjara Hills)\n"
        "and targeted demand-side management in low-consumption districts."
    )
    print("[RECOMMENDATION] Policy suggestion generated.")
    return rec

def logger_agent(log_content, log_dir="outputs/logs"):
    Path(log_dir).mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = f"{log_dir}/run_log_{timestamp}.txt"
    with open(log_file, 'w') as f:
        f.write(log_content)
    print(f"[LOG] Run details saved: {log_file}")

# ===== Execution =====
DATA_PATH = "agricultural_2019_4.csv"

df = ingestion_agent(DATA_PATH)
df = standardization_agent(df)
df = cleaning_agent(df)
insights = insight_agent(df)
recommendations = recommendation_agent()

# Print terminal-ready summary
print("\n" + "="*55)
print("TELANGANA RTGS ")
print("="*55)
print(f"Dataset Size: {df.shape[0]} records, {df.shape[1]} columns\n")
print(insights)
print("\n--- POLICY RECOMMENDATION ---")
print(recommendations)
print("="*55)

# Save log
log_content = (
    f"=== RTGS AI Analyst Run ===\n\n"
    f"INSIGHTS:\n{insights}\n\n"
    f"RECOMMENDATIONS:\n{recommendations}"
)
logger_agent(log_content)


[DATA INGESTION] Successfully loaded dataset: 7465 rows, 12 columns
[STANDARDIZATION] Column names and formats standardized.
[CLEANING] Missing values handled, duplicates removed.
[ANALYSIS] Enhanced insights generated.
[RECOMMENDATION] Policy suggestion generated.

TELANGANA RTGS 
Dataset Size: 7465 records, 12 columns

Shape after cleaning: (7465, 12)
Main Metric Analyzed: units
Mean: 277.37
Median: 0.00
Std Dev: 1,497.12

Top 5 Circles by Average Units:
circle
SECUNDERABAD      6422.500000
MEDCHAL           1526.713376
CYBERCITY          685.815126
SAROORNAGAR        586.723684
RAJENDRA NAGAR     458.548780

Bottom 5 Circles by Average Units:
circle
NAGARKURNOOL          38.987780
HYDERABAD CENTRAL     75.500000
HABSIGUDA             87.953757
MAHABOOBNAGAR        109.892902
YADADRI              136.605081

--- POLICY RECOMMENDATION ---
Prioritize infrastructure optimization in high-consumption zones (e.g., Cybercity & Banjara Hills)
and targeted demand-side management in low-consum