In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

In [None]:
df = pd.read_csv('data/qog_std_cs_jan18.csv')

## 1. Código de la Presentación
### 1.1. Correlación

In [None]:
sns.regplot('wvs_godbel','wvs_godimp', data=df)

In [None]:
df[['wvs_godbel','wvs_godimp']].corr()

In [None]:
sns.regplot('wvs_demimp','wvs_satlif', data=df)

In [None]:
df[['wvs_demimp','wvs_satlif']].corr()

### 1.2. Gráficos

In [None]:
sns.pairplot(df[['ccp_market', 'wdi_gini']])

In [None]:
g = sns.FacetGrid(df, col='ccp_market', col_wrap=3)
g = g.map(sns.distplot, 'wdi_gini')

In [None]:
for i,e in enumerate(df.ccp_market.dropna().unique()):
    print(i,e)
    plt.subplot(1,3,i+1)
    plt.hist(df[df.ccp_market==e].wdi_gini)

### 1.3. Test de Hipótesis

In [None]:
df.wdi_gini.dropna().mean()

In [None]:
stats.ttest_1samp(df.wdi_gini.dropna(), 38.5)

In [None]:
stats.ttest_ind(df[df.ht_region == 3].wvs_godbel.dropna(),
                df[df.ht_region != 3].wvs_godbel.dropna())

In [None]:
df[df.ht_region == 3].wvs_godbel.mean()

In [None]:
df[df.ht_region != 3].wvs_godbel.mean()

## 2. Tips Desafíos

In [None]:
'''
Desafio 1: Generar variables binarias para cada variable categórica.


'''

In [35]:
df = pd.read_csv('data/Credit.csv')

In [36]:
df[df.Gender == 'Male']

Unnamed: 0.1,Unnamed: 0,Income,Limit,Rating,Cards,Age,Education,Gender,Student,Married,Ethnicity,Balance


In [41]:
df[df.Gender.str.strip() == 'Male']

Unnamed: 0.1,Unnamed: 0,Income,Limit,Rating,Cards,Age,Education,Gender,Student,Married,Ethnicity,Balance
0,1,14.891,3606,283,2,34,11,Male,No,Yes,Caucasian,333
2,3,104.593,7075,514,4,71,11,Male,No,No,Asian,580
4,5,55.882,4897,357,2,68,16,Male,No,Yes,Caucasian,331
5,6,80.180,8047,569,4,77,10,Male,No,No,Caucasian,1151
7,8,71.408,7114,512,2,87,9,Male,No,No,Asian,872
10,11,63.095,8117,589,4,30,14,Male,No,Yes,Caucasian,1407
11,12,15.045,1311,138,3,64,16,Male,No,No,Caucasian,0
13,14,43.682,6922,511,1,49,9,Male,No,Yes,Caucasian,1081
19,20,42.079,6626,479,2,44,9,Male,No,No,Asian,1048
22,23,20.103,2631,213,3,61,10,Male,No,Yes,African American,0


## 3. Binarización de Variables

In [None]:
import statsmodels.formula.api as smf
import pandas as pd

In [None]:
df = pd.read_csv('data/Credit.csv')

In [None]:
df.dtypes

In [None]:
m1 = smf.ols('Balance ~ Student', df)
fitted_m1 = m1.fit()

In [None]:
fitted_m1.summary()

In [None]:
import statsmodels.api as sm

Y = df.Balance.values
X = df.Student.map({'No':0, 'Yes':1}).values

In [None]:
X = sm.add_constant(X)

In [None]:
m2 = sm.OLS(Y,X)
fitted_m2 = m2.fit()

In [None]:
fitted_m2.summary()