In [1]:
import pandas as pd

In [2]:
full_df = pd.read_csv('processed/2012-2021.tsv', sep='\t')
full_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18414 entries, 0 to 18413
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   title      18414 non-null  object
 1   year       18414 non-null  object
 2   value      18414 non-null  object
 3   canton     18414 non-null  object
 4   file_year  18414 non-null  int64 
dtypes: int64(1), object(4)
memory usage: 719.4+ KB


In [3]:
full_df.head()

Unnamed: 0,title,year,value,canton,file_year
0,Residents,2010,611466.0,Aargau,2012
1,Population density per km2,2010,438.0,Aargau,2012
2,Change in the last 10 years in %,2000-2010,1.2,Aargau,2012
3,Through migration,2000-2010,9.5,Aargau,2012
4,Through natural increase,2000-2010,2.8,Aargau,2012


In [4]:
def value_column(df, value, column='title'):
    return df[df[column] == value].copy().sort_values(by='year').reset_index(drop=True)

In [5]:
population_df = full_df[full_df['title'] == 'Residents'].reset_index(drop=True)

In [6]:
population_df.head()

Unnamed: 0,title,year,value,canton,file_year
0,Residents,2010,611466,Aargau,2012
1,Residents,2010,53017,Appenzell,2012
2,Residents,2010,15688,Appenzell,2012
3,Residents,2010,274404,Basel-Landschaft,2012
4,Residents,2010,184950,Basel-Stadt,2012


In [7]:
population_df.sort_values(by=['year'])['year'].value_counts()

year
2010    27
2011    27
2012    27
2013    27
2014    27
2015    27
2016    27
2017    27
2018    27
2019    27
Name: count, dtype: int64

In [8]:
population_df.to_csv('processed/population.tsv', sep='\t', index=False)

In [9]:
population_density_df = full_df[(full_df['title'] == 'Population density per km2') | (full_df['title'] == 'Population density per km²')].reset_index(drop=True).sort_values(by=['year'])

In [10]:
population_density_df['year'].value_counts()

year
2010    27
2011    27
2012    27
2013    27
2014    27
2015    27
2016    27
2017    27
2018    27
2019    27
Name: count, dtype: int64

In [11]:
population_density_df.head()

Unnamed: 0,title,year,value,canton,file_year
0,Population density per km2,2010,438,Aargau,2012
26,Population density per km2,2010,197,Switzerland,2012
25,Population density per km2,2010,827,Zurich,2012
24,Population density per km2,2010,546,Zug,2012
23,Population density per km2,2010,253,Vaud,2012


In [12]:
population_density_df.to_csv('processed/population_density.tsv', sep='\t', index=False)

In [13]:
df_0_19 = value_column(full_df, '0-19 years')
df_20_64 = value_column(full_df, '20-64 years')
df_65_plus = pd.concat([value_column(full_df, '65 years or over'), value_column(full_df, '>64 years')], ignore_index=True).reset_index(drop=True)


In [14]:
len(df_0_19), len(df_20_64), len(df_65_plus)

(270, 270, 270)

In [15]:
# save to tsv
df_0_19.to_csv('processed/age_0_19.tsv', sep='\t', index=False)
df_20_64.to_csv('processed/age_20_64.tsv', sep='\t', index=False)
df_65_plus.to_csv('processed/age_65_plus.tsv', sep='\t', index=False)

In [17]:
urban_population_proportion_df = value_column(full_df, 'Urban population in %')
urban_population_proportion_df['year'].value_counts()

year
2010    27
2011    27
2012    27
2013    27
2014    27
2015    27
2016    27
2017    27
2018    27
2019    27
Name: count, dtype: int64

In [18]:
urban_population_proportion_df.to_csv('processed/urban_population_proportion.tsv', sep='\t', index=False)

In [None]:
marriage_rate = value_column(full_df, 'Crude marriage rate')
marriage_rate['year'].value_counts()

year
2013    27
2014    27
2015    27
2016    27
2017    27
2018    27
2019    27
Name: count, dtype: int64

In [27]:
marriage_rate.to_csv('processed/marriage_rate.tsv', sep='\t', index=False)

In [28]:
divorce_rate = value_column(full_df, 'Crude divorce rate')
divorce_rate['year'].value_counts()

year
2013    27
2014    27
2015    27
2016    27
2017    27
2018    27
2019    27
Name: count, dtype: int64

In [29]:
divorce_rate.to_csv('processed/divorce_rate.tsv', sep='\t', index=False)

In [None]:
birth_rate = value_column(full_df, 'Crude birth rate')
birth_rate['year'].value_counts()

In [31]:
birth_rate.to_csv('processed/birth_rate.tsv', sep='\t', index=False)

In [33]:
mortality_rate = value_column(full_df, 'Crude mortality rate')
mortality_rate['year'].value_counts()

year
2013    27
2014    27
2015    27
2016    27
2017    27
2018    27
2019    27
Name: count, dtype: int64

In [34]:
mortality_rate.to_csv('processed/mortality_rate.tsv', sep='\t', index=False)