In [1]:
import pandas as pd
import numpy as np

In [2]:
Origin = ['Europe', 'Asia', 'Africa', 'Oceania', 'Americas']
years = ["1970", "1980", "1990", "2000", "2010"]

In [3]:
def getRatioData(df):
    
    total = df["Total_Population"]
    
    # Gender Based
    df["Male_Ratio"] = df["Male"]/total
    df["Female_Ratio"] = df["Female"]/total
    # Read as Males per 1000 Females
    df["Sex_Ratio"] = (df["Male"] * 1000 )/df["Female"]
    
    # Location Based
    df["Urban_Ratio"] = df["Urban"]/total
    df["Suburban_Ratio"] = df["Suburban"]/total
    df["Rural_Ratio"] = df["Rural"]/total
    # Read as Rural People per 1000 Urban
    df["Location_Ratio"] = (df["Rural"] * 1000) / (df["Urban"] + df["Suburban"])
    
    # Race Based
    df['W_Ratio']    = df['W']/total
    df['AA_Ratio']   = df['AA']/total
    df['AI_Ratio']   = df['AI']/total
    df['APAC_Ratio'] = df['APAC']/total
    # Read as No. of People of other races per 1000 Whites
    df['Race_Ratio'] = ((df['AA'] + df['AI'] + df['APAC']) * 1000)/df['W']
    
    # Origin Based
    df["Native_Ratio"]  = df["Native"] / total
    df["Foreign_Ratio"] = df["Foreign"] / total
    # Read as No. of Foreign per 1000 Natives
    df["Origin_Ratio"] = (df["Foreign"] * 1000) / df["Native"]
    
    # Read as No of j per 1000 Foreigners
    for k, j in enumerate(Origin):
        df[j + '_Ratio'] = df[j] / df["Foreign"]
        
    # No of People below Poverty Level per 1000 People
    df["Poverty_Ratio"] = (df["Poverty"] * 1000)/total
    
    # People per Housing Units
    df["Housing_Ratio"] = total/df["HousingUnits"]
    
    # Gender-Education Based
    tempList = ['5', '8', '11', 'A', 'B', 'G']
    # Male
    totalMale = df["Male_5"] + df["Male_8"] + df["Male_11"] + df["Male_A"] + df["Male_B"] + df["Male_G"]
    for i in tempList:
        df["Male_" + i + "_Ratio"] = df["Male_" + i] / totalMale
        
    # Female
    totalFemale = df["Female_5"] + df["Female_8"] + df["Female_11"] + df["Female_A"] + df["Female_B"] + df["Female_G"]
    for i in tempList:
        df["Female_" + i + "_Ratio"] = df["Female_" + i]/totalFemale
        
    # Read as College Educated Females per 1000 College Educated Males
    educatedFemales = df["Female_A"] + df["Female_B"] + df["Female_G"]
    educatedMales = df["Male_A"] + df["Male_B"] + df["Male_G"]
    df["Educated_Sex_Ratio"] = (educatedFemales * 1000)/educatedMales
    
    # Read Population above 25 College Educated per 1000 people above 25
    df["College_Educated_Ratio"] = ((educatedFemales + educatedMales) * 1000)/(totalFemale + totalMale)
    
    return df

In [4]:
columns = ['Total_Population', 'Urban', 'Suburban', 'Rural', 'Male',
           'Female', 'W', 'AA', 'AI', 'APAC', 'Native', 'Foreign', 'Europe',
           'Asia', 'Africa', 'Oceania', 'Americas', 'Male_5', 'Male_8', 'Male_11',
           'Male_A', 'Male_B', 'Male_G', 'Female_5', 'Female_8', 'Female_11',
           'Female_A', 'Female_B', 'Female_G', 'Household_1', 'Household_2',
           'Household_3', 'Household_4', 'MedianIncome', 'Family_1', 'Family_2',
           'Family_3', 'Family_4', 'Family_5', 'PerCapitaIncome', 'Poverty',
           'HousingUnits']

In [5]:
dataFrame = pd.DataFrame()
for year in years:
    
    # Read CSV using pandas
    df = pd.read_csv('flaskDirectory/NayaWalaDataset/Data' + year + '.csv',usecols=columns)
    
    # Get the Overall Sum
    tempdf = df.sum(axis = 0)
    
    # Get it in correct shape
    tempdf = pd.DataFrame(tempdf)
    tempdf = tempdf.T
    tempdf["YEAR"] = str(year)
    
    # Concat it to global DataFrame
    dataFrame = pd.concat([tempdf, dataFrame], axis=0)

In [6]:
# Get Ratio Data. Same fxn as in Getting_Year_Wise_Data
dataFrame = getRatioData(dataFrame)

dataFrame.set_index('YEAR', inplace=True)

In [9]:
dataFrame.to_csv('flaskDirectory/NayaWalaDataset/Aggregate_Data.csv')