In [5]:
import os
import json
import mysql.connector

# --- MySQL Connection ---
conn = mysql.connector.connect(
    host="localhost",
    user="root",           
    password="1234",  
    database="phonepe_insights"
)
cursor = conn.cursor()

# --- Base Path of Dataset ---
base_path = r"C:\Users\sharan\Documents\phonepe_insights\pulse-master\data"

# ---------------------- AGGREGATED ----------------------

def insert_aggregated_user():
    path = os.path.join(base_path, "aggregated", "user", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)
                        reg_users = data["data"]["aggregated"]["registeredUsers"]
                        app_opens = data["data"]["aggregated"]["appOpens"]
                        cursor.execute("""
                            INSERT INTO aggregated_user (state, year, quarter, registered_users, app_opens)
                            VALUES (%s, %s, %s, %s, %s)
                        """, (state, year, quarter, reg_users, app_opens))
    conn.commit()

def insert_aggregated_transaction():
    path = os.path.join(base_path, "aggregated", "transaction", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)
                        for tx in data["data"]["transactionData"]:
                            name = tx["name"]
                            count = tx["paymentInstruments"][0]["count"]
                            amount = tx["paymentInstruments"][0]["amount"]
                            cursor.execute("""
                                INSERT INTO aggregated_transaction (state, year, quarter, transaction_name, count, amount)
                                VALUES (%s, %s, %s, %s, %s, %s)
                            """, (state, year, quarter, name, count, amount))
    conn.commit()

def insert_aggregated_insurance():
    path = os.path.join(base_path, "aggregated", "insurance", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)
                        for tx in data["data"]["transactionData"]:
                            name = tx["name"]
                            count = tx["paymentInstruments"][0]["count"]
                            amount = tx["paymentInstruments"][0]["amount"]
                            cursor.execute("""
                                INSERT INTO aggregated_insurance (state, year, quarter, name, count, amount)
                                VALUES (%s, %s, %s, %s, %s, %s)
                            """, (state, year, quarter, name, count, amount))
    conn.commit()

# ---------------------- MAP ----------------------

def insert_map_user():
    path = os.path.join(base_path, "map", "user", "hover", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)
                        for district, values in data["data"]["hoverData"].items():
                            cursor.execute("""
                                INSERT INTO map_user (state, district, year, quarter, registered_users, app_opens)
                                VALUES (%s, %s, %s, %s, %s, %s)
                            """, (state, district, year, quarter, values["registeredUsers"], values["appOpens"]))
    conn.commit()

def insert_map_transaction():
    path = os.path.join(base_path, "map", "transaction", "hover", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)
                        for entry in data["data"]["hoverDataList"]:
                            district = entry["name"]
                            count = entry["metric"][0]["count"]
                            amount = entry["metric"][0]["amount"]
                            cursor.execute("""
                                INSERT INTO map_transaction (state, district, year, quarter, count, amount)
                                VALUES (%s, %s, %s, %s, %s, %s)
                            """, (state, district, year, quarter, count, amount))
    conn.commit()

def insert_map_insurance():
    path = os.path.join(base_path, "map", "insurance", "hover", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)
                        for entry in data["data"]["hoverDataList"]:
                            district = entry["name"]
                            count = entry["metric"][0]["count"]
                            amount = entry["metric"][0]["amount"]
                            cursor.execute("""
                                INSERT INTO map_insurance (state, district, year, quarter, count, amount)
                                VALUES (%s, %s, %s, %s, %s, %s)
                            """, (state, district, year, quarter, count, amount))
    conn.commit()

# ---------------------- TOP ----------------------

def insert_top_user():
    path = os.path.join(base_path, "top", "user", "country", "india")
    for year in os.listdir(path):
        for file in os.listdir(os.path.join(path, year)):
            if file.endswith(".json"):
                quarter = int(file.strip(".json"))
                with open(os.path.join(path, year, file), "r") as f:
                    data = json.load(f)

                    for state in data["data"]["states"]:
                        cursor.execute("""
                            INSERT INTO top_user (state, entity_name, entity_type, year, quarter, registered_users)
                            VALUES (%s, %s, %s, %s, %s, %s)
                        """, (state["name"], state["name"], "state", year, quarter, state["registeredUsers"]))

                    for district in data["data"]["districts"]:
                        cursor.execute("""
                            INSERT INTO top_user (state, entity_name, entity_type, year, quarter, registered_users)
                            VALUES (%s, %s, %s, %s, %s, %s)
                        """, (None, district["name"], "district", year, quarter, district["registeredUsers"]))

                    for pincode in data["data"]["pincodes"]:
                        cursor.execute("""
                            INSERT INTO top_user (state, entity_name, entity_type, year, quarter, registered_users)
                            VALUES (%s, %s, %s, %s, %s, %s)
                        """, (None, pincode["name"], "pincode", year, quarter, pincode["registeredUsers"]))
    conn.commit()

def insert_top_transaction():
    path = os.path.join(base_path, "top", "transaction", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)

                        if data["data"]["districts"]:
                            for district in data["data"]["districts"]:
                                cursor.execute("""
                                    INSERT INTO top_transaction (state, entity_name, entity_type, year, quarter, count, amount)
                                    VALUES (%s, %s, %s, %s, %s, %s, %s)
                                """, (state, district["entityName"], "district", year, quarter,
                                      district["metric"]["count"], district["metric"]["amount"]))

                        if data["data"]["pincodes"]:
                            for pincode in data["data"]["pincodes"]:
                                cursor.execute("""
                                    INSERT INTO top_transaction (state, entity_name, entity_type, year, quarter, count, amount)
                                    VALUES (%s, %s, %s, %s, %s, %s, %s)
                                """, (state, pincode["entityName"], "pincode", year, quarter,
                                      pincode["metric"]["count"], pincode["metric"]["amount"]))
    conn.commit()

def insert_top_insurance():
    path = os.path.join(base_path, "top", "insurance", "country", "india", "state")
    for state in os.listdir(path):
        for year in os.listdir(os.path.join(path, state)):
            for file in os.listdir(os.path.join(path, state, year)):
                if file.endswith(".json"):
                    quarter = int(file.strip(".json"))
                    with open(os.path.join(path, state, year, file), "r") as f:
                        data = json.load(f)

                        if data["data"]["districts"]:
                            for district in data["data"]["districts"]:
                                cursor.execute("""
                                    INSERT INTO top_insurance (state, entity_name, entity_type, year, quarter, count, amount)
                                    VALUES (%s, %s, %s, %s, %s, %s, %s)
                                """, (state, district["entityName"], "district", year, quarter,
                                      district["metric"]["count"], district["metric"]["amount"]))

                        if data["data"]["pincodes"]:
                            for pincode in data["data"]["pincodes"]:
                                cursor.execute("""
                                    INSERT INTO top_insurance (state, entity_name, entity_type, year, quarter, count, amount)
                                    VALUES (%s, %s, %s, %s, %s, %s, %s)
                                """, (state, pincode["entityName"], "pincode", year, quarter,
                                      pincode["metric"]["count"], pincode["metric"]["amount"]))
    conn.commit()

# ---------------------- RUN ALL ----------------------

insert_aggregated_user()
insert_aggregated_transaction()
insert_aggregated_insurance()

insert_map_user()
insert_map_transaction()
insert_map_insurance()

insert_top_user()
insert_top_transaction()
insert_top_insurance()

cursor.close()
conn.close()
print("All data inserted successfully!")


All data inserted successfully!
