In [90]:
from pathlib import Path
import json
import pandas as pd
import numpy as np

# Map Trnsactions

In [None]:
def Map_Transactions():
    root_dir = Path("../data/map/transaction/hover/country/india/state")

    lis = []
    for i in root_dir.rglob("*.json"):
        with open(i, 'r') as file:
            content = file.read()
            dataset = json.loads(content)

            state = i.parent.parent.name
            year = i.parent.name

            quarter = i.stem
            quarter = f'Q{quarter}'
            

            lis.append({'state':state, 'year':year, 'quarter':quarter, 'data':dataset})

    # print(lis)

    lis2=[]
    for i in lis:
        for j in i['data']['data']['hoverDataList']:
            # amount format with 2 decimals
            amount_value = j['metric'][0]['amount']
            amount_formatted = f'{amount_value: .2f}'

            data = dict(quarter = i['quarter'],
                        year = i['year'],
                        state = i['state'],
                        district_name = j['name'],
                        type =j['metric'][0]['type'],
                        count = j['metric'][0]['count'],
                        amount = amount_formatted,
                        timestamp = i['data']['responseTimestamp'])
            
            lis2.append(data)

    return lis2

In [None]:
MapTransactions = pd.DataFrame(Map_Transactions())
# MapTransactions.iloc[50:101]
MapTransactions['year'] = MapTransactions['year'].astype(int)
MapTransactions['amount'] = MapTransactions['amount'].astype(float)
MapTransactions['timestamp'] = pd.to_datetime(MapTransactions['timestamp'], unit='ms')
MapTransactions['timestamp'] = MapTransactions['timestamp'].dt.strftime("%Y-%m-%d %H:%M:%S")
# MapTransactions['amount'] = pd.to_numeric(MapTransactions['amount'], errors='coerce')
# MapTransactions['amount'] = MapTransactions['amount'].round(2)
MapTransactions

In [None]:
import mysql.connector
from tabulate import tabulate

In [83]:
def MyCursor():
    db = mysql.connector.connect(
        host ="localhost",
        user = "root",
        password = "balaji",
        database = 'pulse'
    )

    mycursor = db.cursor(buffered=True)
    return db, mycursor

In [None]:
db = mysql.connector.connect(
    host ="localhost",
    user = "root",
    password = "balaji",
    database = 'pulse'
)

mycursor = db.cursor(buffered=True)
mycursor

In [None]:
MapTransactions

In [None]:
mycursor.execute("""create table if not exists mapdataTrans (
                 mapDataTransId int auto_increment primary key, quarter varchar(10), year int(10),
                 state varchar(255), district_name varchar(255), type varchar(10), count int(255),
                 amount float, timestamp datetime) """)

In [None]:
mycursor.execute("show tables")

out = mycursor.fetchall()
print(tabulate(out, [i[0] for i in mycursor.description], tablefmt='psql'))

In [None]:
sql = ("""insert into mapdatatrans (mapDataTransId, quarter, year, state, district_name, type, count,
       amount, timestamp) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)
       on duplicate key update
       quarter = values(quarter), year= values(year), state = values(state), 
       district_name = values(district_name), type = values(type), count = values(count), 
       amount = values(amount), timestamp = values(timestamp)""")

for i in MapTransactions.to_records().tolist():
    mycursor.execute(sql, i)

In [None]:
db.commit()

In [106]:


def StateTransactions(QUARTER, YEAR):
    db,mycursor = MyCursor()
    mycursor.execute("""select
        quarter,
        year,
        state,
        sum(count) as count
        from mapdatatrans 
        group by quarter, year, state""")

    out= mycursor.fetchall()
    data = list(out)
    # print(data)
    columns = ['quarter', 'year', 'state', 'Transactions_Count']

    def convert_to_crore_lakh(value):
        if value >= 10000000:
            return f'{value /10000000 : .2f} Cr'
        else:
            return f'{value / 100000 : .2f} L'

    MapTransactionsState = pd.DataFrame(data=data, columns=columns)
    MapTransactionsState['Transactions_Count'] = MapTransactionsState['Transactions_Count'].astype(np.int64)
    MapTransactionsState = MapTransactionsState[(MapTransactionsState['year'] == YEAR) & (MapTransactionsState['quarter'] == QUARTER)]
    MapTransactionsState['Rank'] = MapTransactionsState['Transactions_Count'].rank(ascending= False)
    MapTransactionsState = MapTransactionsState.sort_values(by=['Transactions_Count', 'Rank'], ascending=[False, True])
    MapTransactionsState = MapTransactionsState.reset_index(drop=True)
    MapTransactionsState['Transacations_Count'] = MapTransactionsState['Transactions_Count'].apply(convert_to_crore_lakh)
    # MapTransactionsState.drop(columns=['Transactions_Count', 'Rank'], inplace=True)
    TopStateTransactions = MapTransactionsState.head(10)
    
    return TopStateTransactions


In [107]:
StateTransactions('Q3', 2023)

Unnamed: 0,quarter,year,state,Transactions_Count,Rank,Transacations_Count
0,Q3,2023,karnataka,2369262302,1.0,236.93 Cr
1,Q3,2023,maharashtra,2290678587,2.0,229.07 Cr
2,Q3,2023,telangana,1875045924,3.0,187.50 Cr
3,Q3,2023,andhra-pradesh,1475164403,4.0,147.52 Cr
4,Q3,2023,uttar-pradesh,1354622466,5.0,135.46 Cr
5,Q3,2023,rajasthan,1237232167,6.0,123.72 Cr
6,Q3,2023,madhya-pradesh,978812593,7.0,97.88 Cr
7,Q3,2023,bihar,734238751,8.0,73.42 Cr
8,Q3,2023,west-bengal,632093191,9.0,63.21 Cr
9,Q3,2023,odisha,622397603,10.0,62.24 Cr


# Map Users

In [None]:
def Map_Users():
    root_dir = Path("../data/map/user/hover/country/india/state")

    lis = []
    for i in root_dir.rglob("*.json"):
        with open(i, "r") as file:
            context = file.read()
            dataset = json.loads(context)

            state = i.parent.parent.name
            year = i.parent.name

            quarter = i.stem
            quarter = f'Q{quarter}'

            lis.append({'state':state, 'year': year, 'quarter':quarter, 'data':dataset})

    lis2 = []
    for i in lis:
        hoverData = i['data']['data']['hoverData']
        # .items() is used to iterate key value pairs
        # district_name is key and district data is value
        
        for district_name, district_data in hoverData.items():
          
            data = dict(quarter = i['quarter'],
                        year = i['year'],
                        state = i['state'],
                        disrict_name = district_name,
                        registeredUsers = district_data['registeredUsers'],
                        appOpens = district_data['appOpens'],
                        timestamp =i['data']['responseTimestamp'])


            lis2.append(data)

    return lis2

In [None]:
MapUsers = pd.DataFrame(Map_Users())
MapUsers['year'] = MapUsers['year'].astype(int)
MapUsers['timestamp'] = pd.to_datetime(MapUsers['timestamp'], unit='ms')
MapUsers['timestamp'] = MapUsers['timestamp'].dt.strftime("%Y-%m-%d %H:%M:%S")
MapUsers.info()

In [None]:
mycursor.execute("""create table if not exists mapdatausers (
                 mapDataUsersId int auto_increment primary key, quarter varchar(10), year int(10),
                 state varchar(255), district_name varchar(255), registeredUsers bigint, 
                 appOpens bigint, timestamp datetime)""")

In [None]:
sql = ("""insert into mapdatausers (mapDataUsersId, quarter, year, state, district_name, 
       registeredUsers, appOpens, timestamp) values(%s, %s, %s, %s, %s, %s, %s, %s)
       on duplicate key update
       quarter = values(quarter), year = values(year), state = values(state), 
       district_name = values(district_name), registeredUsers = values(registeredUsers), 
       appOpens = values(appOpens), timestamp = values(timestamp)""")

for i in MapUsers.to_records().tolist():
    mycursor.execute(sql, i)

In [None]:
db.commit()

In [104]:
def StateUsers(QUARTER, YEAR):
    db,mycursor = MyCursor() 
    mycursor.execute("""select
        quarter,
        year,
        state,
        sum(registeredUsers) as registeredUsers
        from mapdatausers
        group by quarter, year, state;""")

    out = mycursor.fetchall()
    data = list(out)
    columns = ['quarter','year','state', 'registeredUsers']

    def convert_to_crore_lakh(value):
        if value >= 10000000:
            return f'{value / 10000000 : .2f} Cr'
        else:
            return f'{value / 100000: .2f} L'

    DataUsersState = pd.DataFrame(data=data, columns=columns)
    DataUsersState = DataUsersState[(DataUsersState['quarter'] == QUARTER) & (DataUsersState['year']== YEAR)]
    DataUsersState['Rank'] = DataUsersState['registeredUsers'].rank(ascending=False)
    DataUsersState = DataUsersState.sort_values(by=['registeredUsers', 'Rank'], ascending=[False, True])
    DataUsersState = DataUsersState.reset_index(drop=True)
    DataUsersState['registeredUsers'] = DataUsersState['registeredUsers'].apply(convert_to_crore_lakh)
    DataUsersState.drop(columns=['Rank'], inplace=True)
    TopStateUsers = DataUsersState.head(10)
    
    return TopStateUsers

In [105]:
StateUsers('Q1', 2023)

Unnamed: 0,quarter,year,state,registeredUsers
0,Q1,2023,maharashtra,5.85 Cr
1,Q1,2023,uttar-pradesh,4.78 Cr
2,Q1,2023,karnataka,3.77 Cr
3,Q1,2023,andhra-pradesh,2.91 Cr
4,Q1,2023,rajasthan,2.85 Cr
5,Q1,2023,telangana,2.68 Cr
6,Q1,2023,west-bengal,2.67 Cr
7,Q1,2023,tamil-nadu,2.63 Cr
8,Q1,2023,madhya-pradesh,2.40 Cr
9,Q1,2023,gujarat,2.34 Cr
