In [None]:
import pandas as pd
import numpy as np

def get_list_of_university_towns():
    '''Returns a DataFrame of towns and the states they are in from the 
    university_towns.txt list. The format of the DataFrame should be:
    DataFrame( [ ["Michigan", "Ann Arbor"], ["Michigan", "Yipsilanti"] ], 
    columns=["State", "RegionName"]  )
    
    The following cleaning needs to be done:

    1. For "State", removing characters from "[" to the end.
    2. For "RegionName", when applicable, removing every character from " (" to the end.
    3. Depending on how you read the data, you may need to remove newline character '\n'. '''
    
    list1=[]
    
    with open(r'university_towns.txt') as f:
        for line in f:
            if('edit' in line):
                current_state=line.split('[')[0].strip()
            else:
                list1.append((current_state,line.split('(')[0].strip()))
                
    df=pd.DataFrame(list1,columns=['State','RegionName'])
    df.loc[96,'RegionName']='Champaign Urbana'
    
    return df

get_list_of_university_towns()

In [None]:
def get_recession_start():
    
    '''Returns the year and quarter of the recession start time as a 
    string value in a format such as 2005q3'''
    
    
    df=pd.read_excel('gdplev.xls',skiprows=7)
    df=df[['Unnamed: 4','Unnamed: 5']]
    df.columns=['Quarter','GDP']
    df=df.loc[212:,:]
    df.reset_index(drop=True,inplace=True)
    #df.head()

    for i in range(len(df)-2):
        if((df['GDP'][i]>df['GDP'][i+1]) & (df['GDP'][i+1]>df['GDP'][i+2])):
            index=i
            break

    return df.loc[index,'Quarter']

get_recession_start()

In [None]:
def get_recession_end():
    
    df=get_GDP_df()
    
    for i in range(len(df)-2):
        if((df['GDP'][i]>df['GDP'][i+1]) & (df['GDP'][i+1]>df['GDP'][i+2])):
            index_recession_start=i
            break
            
    for j in range(index_recession_start,len(df)-2):
        if((df['GDP'][j]<df['GDP'][j+1]) & (df['GDP'][j+1]<df['GDP'][j+2])):
            index_recession_end=j+2
            break

    return df.loc[index_recession_end,'Quarter']

#get_recession_end()

In [None]:
def get_recession_bottom():
    '''Returns the year and quarter of the recession bottom time as a 
    string value in a format such as 2005q3'''
    
    df=get_GDP_df()
    
    for i in range(len(df)-4):
        if(((df['GDP'][i]>df['GDP'][i+1]) & (df['GDP'][i+1]>df['GDP'][i+2])) & ((df['GDP'][i+2]<df['GDP'][i+3]) & (df['GDP'][i+3]<df['GDP'][i+4]))):
            recession_bottom=i+2
    
    return df.iloc[recession_bottom][0]

#get_recession_bottom()

In [None]:
def convert_housing_data_to_quarters():
    df=pd.read_csv('City_Zhvi_AllHomes.csv')
    df=df.drop(df.columns[[0]+list(range(3,51))],axis=1)
    df2=pd.DataFrame(df[[df.columns[0],df.columns[1]]])

    for year in range(2000,2016):
        df2[str(year)+'q1']=df[[str(year)+'-01',str(year)+'-02',str(year)+'-03']].mean(axis=1)
        df2[str(year)+'q2']=df[[str(year)+'-04',str(year)+'-05',str(year)+'-06']].mean(axis=1)
        df2[str(year)+'q3']=df[[str(year)+'-07',str(year)+'-08',str(year)+'-09']].mean(axis=1)
        df2[str(year)+'q4']=df[[str(year)+'-10',str(year)+'-11',str(year)+'-12']].mean(axis=1)    

    year=2016
    df2[str(year)+'q1']=df[[str(year)+'-01',str(year)+'-02',str(year)+'-03']].mean(axis=1)
    df2[str(year)+'q2']=df[[str(year)+'-04',str(year)+'-05',str(year)+'-06']].mean(axis=1)
    df2[str(year)+'q3']=df[[str(year)+'-07',str(year)+'-08']].mean(axis=1)

    df2['State']=[states[state] for state in df2['State']]

    df2.set_index(["State","RegionName"],inplace=True)

    return df2

#convert_housing_data_to_quarters()

In [None]:
def run_ttest(): 
    housing_data=convert_housing_data_to_quarters()

    recession_start=get_recession_start()
    recession_bottom=get_recession_bottom()

    housing_data=housing_data.loc[:,recession_start:recession_bottom]
    housing_data.reset_index(inplace=True)

    housing_data['price_ratio'] = (housing_data[recession_start] - housing_data[recession_bottom]) / housing_data[recession_start]

    df=get_list_of_university_towns()

    uni_towns_list=df['RegionName'].tolist()

    housing_data['is_uni_town']=housing_data['RegionName'].apply(lambda x: x in uni_towns_list)

    uni_data=housing_data[housing_data['is_uni_town']].copy().dropna()
    not_uni_data=housing_data[~housing_data['is_uni_town']].copy().dropna()

    p = ttest_ind(uni_data['price_ratio'], not_uni_data['price_ratio'])[1]

    different = p < 0.01 

    lower = 'university town' if uni_data['price_ratio'].mean() < not_uni_data['price_ratio'].mean() else 'non-university town'

    return (different, p, lower)

run_ttest()