# Predicting Air temperature for 2019 for station PTIM4

We are using data from years 2011 to 2018 to predict the air temperature for year 2019


### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from glob import glob

### Using glob to read multiple files at once. The output is a list of dataframes

In [2]:
filenames = glob('/Users/shrav/Downloads/Ice Cover/PTIM4/ptim4*.txt')
dataframes = [pd.read_csv(f, delim_whitespace = True, header = 0, skiprows = 1) for f in filenames]

### Concatenating the list of dataframes to form a single dataframe

In [3]:
df_ice_cover = pd.concat(dataframes, sort = False)

### Checking the head and tail of the dataframe to make sure data from 2011 to 2019 is read without defects

In [4]:
df_ice_cover.head()

Unnamed: 0,#yr,mo,dy,hr,mn,degT,m/s,m/s.1,m,sec,sec.1,degT.1,hPa,degC,degC.1,degC.2,mi,ft
0,2011,1,1,0,0,311,1.6,2.0,99.0,99.0,99.0,999,1007.6,3.3,999.0,999.0,99.0,99.0
1,2011,1,1,0,6,266,1.2,1.7,99.0,99.0,99.0,999,1007.9,3.1,999.0,999.0,99.0,99.0
2,2011,1,1,0,12,242,1.5,2.0,99.0,99.0,99.0,999,1008.0,3.1,999.0,999.0,99.0,99.0
3,2011,1,1,0,18,265,1.6,2.4,99.0,99.0,99.0,999,1008.2,3.2,999.0,999.0,99.0,99.0
4,2011,1,1,0,24,272,1.8,2.7,99.0,99.0,99.0,999,1008.2,3.2,999.0,999.0,99.0,99.0


In [5]:
df_ice_cover.tail()

Unnamed: 0,#yr,mo,dy,hr,mn,degT,m/s,m/s.1,m,sec,sec.1,degT.1,hPa,degC,degC.1,degC.2,mi,ft
84798,2019,12,31,23,30,284,9.3,12.1,99.0,99.0,99.0,999,998.4,-0.4,999.0,-0.4,99.0,99.0
84799,2019,12,31,23,36,281,9.4,11.1,99.0,99.0,99.0,999,998.6,-0.5,999.0,-0.5,99.0,99.0
84800,2019,12,31,23,42,287,8.5,11.6,99.0,99.0,99.0,999,998.5,-0.6,999.0,-1.0,99.0,99.0
84801,2019,12,31,23,48,287,8.8,11.3,99.0,99.0,99.0,999,998.6,-0.7,999.0,-1.0,99.0,99.0
84802,2019,12,31,23,54,287,9.1,11.4,99.0,99.0,99.0,999,998.7,-0.6,999.0,-1.3,99.0,99.0


In [6]:
##### Checking the dimensions of the dataframe #########
df_ice_cover.shape

(769386, 18)

In [7]:
############### Displaying the columns #################
df_ice_cover.columns

Index(['#yr', 'mo', 'dy', 'hr', 'mn', 'degT', 'm/s', 'm/s.1', 'm', 'sec',
       'sec.1', 'degT.1', 'hPa', 'degC', 'degC.1', 'degC.2', 'mi', 'ft'],
      dtype='object')

In [8]:
columns = ["YYYY","MM","DD","hh","mm","WDIR","WSPD","GST","WVHT","DPD","APD","MWD","BAR","ATMP","WTMP","DEWP","VIS","TIDE"]

In [9]:
############## Assigning new column names ##############
df_ice_cover.columns = columns

In [11]:
########### Checking the datatypes of features #########
df_ice_cover.dtypes

YYYY      int64
MM        int64
DD        int64
hh        int64
mm        int64
WDIR      int64
WSPD    float64
GST     float64
WVHT    float64
DPD     float64
APD     float64
MWD       int64
BAR     float64
ATMP    float64
WTMP    float64
DEWP    float64
VIS     float64
TIDE    float64
dtype: object

In [12]:
## Verifying if the dataframe has the appropriate header##
df_ice_cover.head(10)

Unnamed: 0,YYYY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,BAR,ATMP,WTMP,DEWP,VIS,TIDE
0,2011,1,1,0,0,311,1.6,2.0,99.0,99.0,99.0,999,1007.6,3.3,999.0,999.0,99.0,99.0
1,2011,1,1,0,6,266,1.2,1.7,99.0,99.0,99.0,999,1007.9,3.1,999.0,999.0,99.0,99.0
2,2011,1,1,0,12,242,1.5,2.0,99.0,99.0,99.0,999,1008.0,3.1,999.0,999.0,99.0,99.0
3,2011,1,1,0,18,265,1.6,2.4,99.0,99.0,99.0,999,1008.2,3.2,999.0,999.0,99.0,99.0
4,2011,1,1,0,24,272,1.8,2.7,99.0,99.0,99.0,999,1008.2,3.2,999.0,999.0,99.0,99.0
5,2011,1,1,0,30,265,2.2,2.9,99.0,99.0,99.0,999,1008.4,3.0,999.0,999.0,99.0,99.0
6,2011,1,1,0,36,266,1.9,3.1,99.0,99.0,99.0,999,1008.4,3.0,999.0,999.0,99.0,99.0
7,2011,1,1,0,42,272,1.4,2.8,99.0,99.0,99.0,999,1008.3,3.1,999.0,999.0,99.0,99.0
8,2011,1,1,0,48,273,1.3,1.9,99.0,99.0,99.0,999,1008.4,2.9,999.0,999.0,99.0,99.0
9,2011,1,1,0,54,286,1.6,1.8,99.0,99.0,99.0,999,1008.4,3.1,999.0,999.0,99.0,99.0


### 