In [None]:
import pymssql
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:

database = "arctic_analysts_capstone"
user = "arctic_analysts"
password  = "ThisPassw0rd!"
server = "gen10-data-fundamentals-22-02-sql-server.database.windows.net"

def sql_query(query):
    conn = pymssql.connect(server, user, password, database)
    cursor = conn.cursor()
    queried_data = pd.read_sql(query, conn)
    return queried_data

In [None]:
def run_queries():
    tables = ["year", "month", "county", "median_income", "main_table"]

    query = f"SELECT * FROM {tables[0]}"
    year_df = sql_query(query)

    query = f"SELECT * FROM {tables[1]}"
    month_df = sql_query(query)

    query = f"SELECT * FROM {tables[2]}"
    county_df = sql_query(query)

    query = f"SELECT * FROM {tables[3]}"
    median_income_df = sql_query(query)

    query = f"SELECT * FROM {tables[4]}"
    main_table = sql_query(query)

    all_df = [year_df, month_df, county_df, median_income_df, main_table]
    return all_df

In [None]:
def join_tables(all_df):
    year_df = all_df[0]
    month_df = all_df[1]
    county_df = all_df[2]
    median_income_df = all_df[3]
    main_table = all_df[4]

    master_table = pd.merge(
        main_table, year_df, left_on="YearID", right_on="YearID", how="outer"
    )
    print(master_table.shape[0])
    # Now has 5607 rows

    master_table = pd.merge(
        master_table, month_df, left_on="MonthID", right_on="MonthID", how="outer"
    )
    # Now has 5607 rows
    print(master_table.shape[0])

    master_table = pd.merge(
        master_table, county_df, left_on="FIPS", right_on="FIPS", how="outer"
    )
    # Now has 5607 rows
    print(master_table.shape[0])

    master_table = pd.merge(
        master_table,
        median_income_df,
        left_on=["FIPS", "YearID"],
        right_on=["FIPS", "YearID"],
        how="outer",
    )
    # Now has 20727 rows
    print(master_table.shape[0])

    master_table.loc[(master_table.MedianIncome < 0), "MedianIncome"] = None
    return master_table

In [None]:
all_df = run_queries()
master_table = join_tables(all_df)

In [None]:
master_table.describe()

In [None]:
master_table.AgeGroup.value_counts()

In [None]:
master_table[(master_table.AgeGroup.isna())].Year.unique()

### Explanation
**What is happening?**
1. We need the main table that has all the data.
2. We will need our predicted data.
3. We are only trying to learn about 2020 through 2022.
**PLAN**
> 1. Filter the main table
> 2. Filter the predicted table
> 3. Combine the two tables.

**Note**
> The predicted data is yearly, the main data is monthly.

In [None]:
# reading in predictions
path = 'PredictedIncomeFinal.csv'
df = pd.read_csv(path)

df['MedianIncome'].update(df['train_and_predicted'])
cleaned_predictions = df[(df.Year > 2019) & (df.Year < 2023)].drop(columns = ['train_and_predicted'])
cleaned_predictions['FIPS'] = cleaned_predictions['FIPS'].astype('str')

# Calculating monthly income
cleaned_predictions['MonthlyIncome'] = cleaned_predictions['MedianIncome'] / 12
cleaned_predictions.drop(columns = ['MedianIncome'], inplace = True)
cleaned_predictions.head(3)

In [None]:
# Filtering master table to just the targeted data
target_df = master_table[(master_table.Year > 2019) & (master_table.Year < 2023)]
target_df = target_df[['FIPS','Year','AverageRate','AveragePoints','County','MedianHousePrice']]
target_df

In [None]:
# Merging predicting with actual
merged_tables = pd.merge(cleaned_predictions, target_df, left_on = ['Year', 'FIPS'], right_on = ['Year', 'FIPS'], how = 'outer')
merged_tables

#### Bringing in Hans Calculation Code

In [None]:
# CALCULATIONS BASED ON 12% DOWNPAYMENT

#calculate monthly income
# This step was already completed.
#final_table['MonthlyIncome'] = final_table['MedianIncome']/12

#calculate montly mortgage payment
#https://www.educba.com/mortgage-formula/

final_table = merged_tables.copy()

for row in final_table:
    P = final_table['MedianHousePrice']-(final_table['MedianHousePrice']*.12)
    r = (final_table['AverageRate']/100)
    t = 30
    n = 12
    monthly_tax = (final_table['MedianHousePrice']*.0189)/12
    final_table['MonthlyMortgage'] = (P * (((r/n) * pow((1+(r/n)),(n*t))) / (pow((1+r/n),(n*t))-1))) + monthly_tax


# mortgage to income ratio
final_table['mortgage_income_ratio'] = final_table['MonthlyMortgage']/final_table['MonthlyIncome']

#affordability determination
def affordable_condition(x):
    if x <= .25:
        return 'Yes'
    elif np.isnan(x):
        return 'Missing'
    else:
        return 'No'

final_table['affordable'] = final_table['mortgage_income_ratio'].apply(affordable_condition)
final_table

In [None]:
final_table.affordable.value_counts()

In [None]:
final_table.AgeGroup.value_counts()

In [None]:
#final_table.to_csv('affordability_results.csv', index = False)

In [None]:
final_annual_df = final_table.groupby(by = ['Year','FIPS','AgeGroup','County'])[['MedianHousePrice','MonthlyIncome','MonthlyMortgage']].agg('mean').reset_index()

final_annual_df['mortgage_income_ratio'] = final_annual_df['MonthlyMortgage']/final_annual_df['MonthlyIncome']
final_annual_df['affordable'] = final_annual_df['mortgage_income_ratio'].apply(affordable_condition)
final_annual_df

In [None]:
monthly_final_table.affordable.value_counts()

# Jed is working above this cell
### Below this cell is the original version that I slightly adjusted so it's not going to work as intended.

In [None]:
# year 2020-2022 aggregated
predicted_years = master_table[(master_table['Year'] == 2020) | (master_table['Year'] == 2021) | (master_table['Year'] == 2022)]
target_df = predicted_years[['FIPS','Year','YearID','County','MedianHousePrice','AverageRate','AveragePoints']].groupby(by=['FIPS','Year','County']).agg('mean').reset_index()
target_df['FIPS'] = target_df['FIPS'].astype('str')


# reading in predictions
path = 'PredictedIncomeFinal.csv'
df = pd.read_csv(path)

df['MedianIncome'].update(df['train_and_predicted'])
cleaned_predictions = df[(df.Year > 2019) & (df.Year < 2024)].drop(columns = ['train_and_predicted'])
cleaned_predictions['FIPS'] = cleaned_predictions['FIPS'].astype('str')
cleaned_predictions.head(3)


# list = []
# for row in df.index:
#     values_list = []
#     if pd.isna(df['FIPS'][row]) & (df['Year'][row] == 2020):
#         values_list.append(df['Year'][row])
#         values_list.append(int(df['FIPS'][row-1]))
#         values_list.append(df['AgeGroup'][row])
#         values_list.append(int(df['train_and_predicted'][row]))
#     elif pd.isna(df['FIPS'][row]) & (df['Year'][row] == 2021):
#         values_list.append(df['Year'][row])
#         values_list.append(int(df['FIPS'][row-2]))
#         values_list.append(df['AgeGroup'][row])
#         values_list.append(int(df['train_and_predicted'][row]))
#     elif pd.isna(df['FIPS'][row]) & (df['Year'][row] == 2022):
#         values_list.append(df['Year'][row])
#         values_list.append(int(df['FIPS'][row-3]))        
#         values_list.append(df['AgeGroup'][row])
#         values_list.append(int(df['train_and_predicted'][row]))
#     if len(values_list) > 0:
#         list.append(values_list)

In [None]:
# Nulls in the Predicted MedianIncome
cleaned_predictions[(cleaned_predictions.MedianIncome.isna())]

In [None]:
# columns = ['Year', 'FIPS', 'AgeGroup', 'MedianIncome']

# income_predictions = pd.DataFrame(data=list,columns=columns)

# merging income predictions to get the rest of the data
main_predictions = pd.merge(target_df, cleaned_predictions, on=['Year','FIPS'],how='inner')
main_predictions

In [None]:
# I don't think this is accurately brining the data in
#########################################################
# # adding income predictions to main table
# final_table = master_table.dropna()
# #final_table['FIPS'] = final_table['FIPS'].astype(int)
# final_table = final_table[['FIPS','Year','YearID','MonthID','MedianHousePrice', 'AverageRate', 'AveragePoints',
#        'MedianIncome', 'County', 'AgeGroup']]
# final_table = pd.concat([final_table,main_predictions])

In [None]:
final_table = pd.merge(main_predictions, master_table, left_on = ['Year', 'FIPS'])

In [None]:
# CALCULATIONS BASED ON 12% DOWNPAYMENT

#calculate monthly income
final_table['MonthlyIncome'] = final_table['MedianIncome']/12

#calculate montly mortgage payment
#https://www.educba.com/mortgage-formula/

for row in final_table:
    P = final_table['MedianHousePrice']-(final_table['MedianHousePrice']*.12)
    r = (final_table['AverageRate']/100)
    t = 30
    n = 12
    monthly_tax = (final_table['MedianHousePrice']*.0189)/12
    final_table['MonthlyMortgage'] = (P * (((r/n) * pow((1+(r/n)),(n*t))) / (pow((1+r/n),(n*t))-1))) + monthly_tax


# mortgage to income ratio
final_table['mortgage_income_ratio'] = final_table['MonthlyMortgage']/final_table['MonthlyIncome']

#affordability determination
def affordable_condition(x):
    if x <= .25:
        return 'Yes'
    elif np.isnan(x):
        return 'Missing'
    else:
        return 'No'

final_table['affordable'] = final_table['mortgage_income_ratio'].apply(affordable_condition)
final_table

In [None]:
# IF NEEDED
# FILTERING OUT SOMMERSET BECAUSE WE DON'T HAVE INCOME PREDICTIONS FOR 2020-2022

# excluded_list = ['Somerset County']
# counties = master_table['County'].unique()
# included_counties = np.setdiff1d(counties, excluded_list)
# final_table = final_table[final_table['County'].isin(included_counties)]
# final_table

In [None]:
final_table[(final_table.MonthID.isna())]

In [None]:
final_table.affordable.value_counts()

In [None]:
null_counts = final_table.isnull().sum()
null_counts[null_counts > 0].sort_values(ascending=False)

In [None]:
final_table = final_table.drop(columns = ['YearID'])
final_table

In [None]:
final_table.to_csv('affordability_results.csv', index = False)

In [None]:
final_table[(final_table.Year == 2005) & (final_table.AgeGroup == '65-plus')]

In [None]:
print(2002/2022)

In [None]:
2022-2020

In [None]:
for i in range(2000,2022):
    val = ((i/2000)-1)*100
    print(val)