In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
%matplotlib inline

In [13]:
# column descriptions: https://thinkful-ed.github.io/data-201-resources/ESS_practice_data/ESS_codebook.html
df = pd.read_csv('https://raw.githubusercontent.com/Thinkful-Ed/data-201-resources/master/ESS_practice_data/ESSdata_Thinkful.csv')
df = df.rename(columns=
               {'cntry': 'country',
                'idno': 'id_number',
                'year': 'year', # 6 = 2012, 7 = 2014
                'tvtot': 'weekly_tv_time', # 0 = No time at all, 7 = More than 3 hours
                'ppltrst': 'people_trust_0_10', # 0 = You can't be too careful, 10 = Most people can be trusted
                'pplfair': 'people_fair_0_10',
                'pplhlp': 'people_help_0_10',
                'happy': 'happy_0_10',
                'sclmeet': 'social_meeting_frequency_1_7', # 1 = Never, 7 = Every day
                'sclact': 'social_activities_vs_peers_1_5', # 1 = Much less than most, 5 = Much more than most
                'gndr': 'gender', # 1 = male, 2 = female
                'agea': 'age', 
                'partner': 'has_partner', # 1 = lives with partner, 2 = does not
               })
df.head(5)

Unnamed: 0,country,id_number,year,weekly_tv_time,people_trust_0_10,people_fair_0_10,people_help_0_10,happy_0_10,social_meeting_frequency_1_7,social_activities_vs_peers_1_5,gender,age,has_partner
0,CH,5.0,6,3.0,3.0,10.0,5.0,8.0,5.0,4.0,2.0,60.0,1.0
1,CH,25.0,6,6.0,5.0,7.0,5.0,9.0,3.0,2.0,2.0,59.0,1.0
2,CH,26.0,6,1.0,8.0,8.0,8.0,7.0,6.0,3.0,1.0,24.0,2.0
3,CH,28.0,6,4.0,6.0,6.0,7.0,10.0,6.0,2.0,2.0,64.0,1.0
4,CH,29.0,6,5.0,6.0,7.0,5.0,8.0,7.0,2.0,2.0,55.0,1.0


In [59]:
# 1. Did people become less trusting from 2012 to 2014? Compute results for each country in the sample.

print(df.groupby(['year'])['people_trust_0_10'].mean())
print(df.groupby(['year', 'country'])['people_trust_0_10'].mean())

year
6    5.563098
7    5.556720
Name: people_trust_0_10, dtype: float64
year  country
6     CH         5.677878
      CZ         4.362519
      DE         5.214286
      ES         5.114592
      NO         6.649315
      SE         6.058499
7     CH         5.751617
      CZ         4.424658
      DE         5.357143
      ES         4.895128
      NO         6.598630
      SE         6.257709
Name: people_trust_0_10, dtype: float64


In [60]:
# 2. Did people become happier from 2012 to 2014? Compute results for each country in the sample.
print(df.groupby(['year'])['happy_0_10'].mean())
print(df.groupby(['year', 'country'])['happy_0_10'].mean())

year
6    7.723573
7    7.665734
Name: happy_0_10, dtype: float64
year  country
6     CH         8.088312
      CZ         6.770898
      DE         7.428571
      ES         7.548680
      NO         8.251719
      SE         7.907387
7     CH         8.116429
      CZ         6.914110
      DE         7.857143
      ES         7.419967
      NO         7.915185
      SE         7.946961
Name: happy_0_10, dtype: float64


In [66]:
# 3. Who reported watching more TV in 2012, men or women?
# 1 = male, 2 = female
df[df['year'] == 6].groupby(['gender'])['weekly_tv_time'].mean()

gender
1.0    3.901906
2.0    3.944393
Name: weekly_tv_time, dtype: float64

In [68]:
# 4. Who was more likely to believe people were fair in 2012, people living with a partner or people living alone?
# 1 = lives with partner, 2 = does not
df[df['year'] == 6].groupby(['has_partner'])['people_fair_0_10'].mean()

has_partner
1.0    6.080736
2.0    5.856965
Name: people_fair_0_10, dtype: float64

In [71]:
# 5. Pick three or four of the countries in the sample and compare how often people met socially in 2014.
# Are there differences, and if so, which countries stand out?
mean_gathering_by_country = df[df['year'] == 7].groupby(['country'])['social_meeting_frequency_1_7'].mean()
print(mean_gathering_by_country)
print("max: ", mean_gathering_by_country.max())
print("min: ", mean_gathering_by_country.min())

# DE is really low, SE is really high

country
CH    5.160622
CZ    4.445802
DE    4.428571
ES    5.260116
NO    5.302326
SE    5.426211
Name: social_meeting_frequency_1_7, dtype: float64
max:  5.42621145374
min:  4.42857142857


In [82]:
# 6. Pick three or four of the countries in the sample and compare how often people took part in social activities
# relative to others their age, in 2014. Are there differences, and if so, which countries stand out?

bins = [0, 25, 50, 75, 100, 125]
labels = ["Kids", "Young adults", "Adults", "Elder", "Very old"]
df['age_buckets'] = pd.cut(df['age'], bins, labels=labels)

df[df['year'] == 7].groupby(['country', 'age_buckets'])['social_activities_vs_peers_1_5'].mean().round(2)

country  age_buckets 
CH       Kids            2.83
         Young adults    2.79
         Adults          2.78
         Elder           2.63
CZ       Kids            2.94
         Young adults    2.67
         Adults          2.70
         Elder           2.50
DE       Young adults    3.00
         Adults          2.75
         Elder           2.00
ES       Kids            2.62
         Young adults    2.73
         Adults          2.61
         Elder           2.04
         Very old        3.00
NO       Kids            2.95
         Young adults    2.81
         Adults          2.85
         Elder           2.90
SE       Kids            2.88
         Young adults    2.89
         Adults          2.89
         Elder           2.78
         Very old        2.00
Name: social_activities_vs_peers_1_5, dtype: float64