In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('stockholm_temperature.csv')

### Exploring the data

In [4]:
df.head()

Unnamed: 0,year;jan;feb;mar;apr;may;jun;jul;aug;sep;oct;nov;dec
0,2017;-1.0;-0.7;2.4;4.0;10.1;14.1;16.4;16.2;12....
1,2018;-0.6;-4.1;-2.5;6.2;14.8;16.1;21.5;18.5;13...
2,2019;-2.5;1.0;1.8;6.5;9.6;17.3;16.7;17.3;12.8;...
3,2020;3.3;2.0;2.7;6.2;8.8;17.4;16.0;18.6;13.8;9...
4,1980;-4.7;-6.2;-3.1;4.8;7.8;15.6;16.8;14.6;12....


#### It seems like every row is a string, each values we are interested in are separated by ";"

In [5]:
df.columns

Index(['year;jan;feb;mar;apr;may;jun;jul;aug;sep;oct;nov;dec'], dtype='object')

#### Let's try to split it to get an array that contains columns

In [7]:
columns = df.columns[0].split(";")
columns

['year',
 'jan',
 'feb',
 'mar',
 'apr',
 'may',
 'jun',
 'jul',
 'aug',
 'sep',
 'oct',
 'nov',
 'dec']

#### Spliting each row so we can concatenate them with columns later

In [21]:
rows=[]
for row in df.values:
    row = row[0].split(";")
    rows.append(row)


#### Concatenating the new dataframe

In [20]:
new_df = pd.DataFrame(data=rows, columns=columns)
new_df.head()

Unnamed: 0,year,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
0,2018,-0.6,-4.1,-2.5,6.2,14.8,16.1,21.5,18.5,13.9,8.4,4.0,0.2
1,2019,-2.5,1.0,1.8,6.5,9.6,17.3,16.7,17.3,12.8,6.9,3.6,2.4
2,2020,3.3,2.0,2.7,6.2,8.8,17.4,16.0,18.6,13.8,9.0,6.4,3.4
3,1980,-4.7,-6.2,-3.1,4.8,7.8,15.6,16.8,14.6,12.8,5.7,-0.0,-0.8
4,1981,-4.0,-2.7,-2.2,3.9,10.8,12.6,16.4,14.6,11.7,6.5,1.1,-5.7


### We dealt with the weird shape of the data now, it's a proper dataframe that we can use 

#### Let's set the index to the year column

In [23]:
df = new_df

df.set_index('year', inplace=True)
df.head()

Unnamed: 0_level_0,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018,-0.6,-4.1,-2.5,6.2,14.8,16.1,21.5,18.5,13.9,8.4,4.0,0.2
2019,-2.5,1.0,1.8,6.5,9.6,17.3,16.7,17.3,12.8,6.9,3.6,2.4
2020,3.3,2.0,2.7,6.2,8.8,17.4,16.0,18.6,13.8,9.0,6.4,3.4
1980,-4.7,-6.2,-3.1,4.8,7.8,15.6,16.8,14.6,12.8,5.7,-0.0,-0.8
1981,-4.0,-2.7,-2.2,3.9,10.8,12.6,16.4,14.6,11.7,6.5,1.1,-5.7


In [28]:
#replacing the months with the corresponding numbers
months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
replacements = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

for month, replacement in zip(months, replacements):
    df.columns = df.columns.str.replace(month, replacement)
df.head()

Unnamed: 0_level_0,01,02,03,04,05,06,07,08,09,10,11,12
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018,-0.6,-4.1,-2.5,6.2,14.8,16.1,21.5,18.5,13.9,8.4,4.0,0.2
2019,-2.5,1.0,1.8,6.5,9.6,17.3,16.7,17.3,12.8,6.9,3.6,2.4
2020,3.3,2.0,2.7,6.2,8.8,17.4,16.0,18.6,13.8,9.0,6.4,3.4
1980,-4.7,-6.2,-3.1,4.8,7.8,15.6,16.8,14.6,12.8,5.7,-0.0,-0.8
1981,-4.0,-2.7,-2.2,3.9,10.8,12.6,16.4,14.6,11.7,6.5,1.1,-5.7


In [32]:
# making each month a row next to its year so it becomes a time series
df = df.stack()
# making the rows to the format "YYYY-MM" instead
df.index = df.index.map(lambda x: x[0] + '-' + x[1])
df.head()

2018-01    -0.6
2018-02    -4.1
2018-03    -2.5
2018-04     6.2
2018-05    14.8
dtype: object

In [33]:
df

2018-01    -0.6
2018-02    -4.1
2018-03    -2.5
2018-04     6.2
2018-05    14.8
           ... 
2015-08    17.5
2015-09    13.2
2015-10     7.9
2015-11     4.6
2015-12     3.5
Length: 468, dtype: object

### Now we really have a time series XD

In [34]:
# exporting it as a time series
df.to_csv('stockholm_temperature_time_series.csv')