### Objective--
Review transaction numbers, and oil prices to optimize for a neural network.

## Explore Oil
- find and fill missing dates
- create a new column that describes the CHANGE in price, rather than the price itself

In [1]:
import pandas as pd
from collections import Counter

In [2]:
oil_df = pd.read_csv('data/oil.csv')
oil_df

Unnamed: 0,date,dcoilwtico
0,2013-01-01,
1,2013-01-02,93.14
2,2013-01-03,92.97
3,2013-01-04,93.12
4,2013-01-07,93.20
5,2013-01-08,93.21
6,2013-01-09,93.08
7,2013-01-10,93.81
8,2013-01-11,93.60
9,2013-01-14,94.27


In [3]:
# are their any null values
null_oil_df = oil_df.isnull()
null_oil_df.describe()
print("\n",Counter(null_oil_df['dcoilwtico']))


 Counter({False: 1175, True: 43})


In [4]:
# check for duplicate dates
Counter(oil_df.duplicated('date', keep=False))

Counter({False: 1218})

In [5]:
# The first oil data point is NaN, but it seems more reasonable to assume the price of oil was the same as the next day,
# rather than setting it to 0. While it is just one data point, it will hopefully have a better impact on our neural 
# network than setting it to 0.
oil_df.set_value(0,'dcoilwtico',93.14)
print(oil_df['dcoilwtico'][0])

93.14


In [6]:
# Since our current data frame is missing dates, we should merge our dataset with continuous dates.
all_dates = pd.read_csv('all_dates.csv')
oil_df = all_dates.merge(oil_df, on='date', how='left')
oil_df

Unnamed: 0,date,month,day,year,dcoilwtico
0,2013-01-01,1,1,2013,93.14
1,2013-01-02,1,2,2013,93.14
2,2013-01-03,1,3,2013,92.97
3,2013-01-04,1,4,2013,93.12
4,2013-01-05,1,5,2013,
5,2013-01-06,1,6,2013,
6,2013-01-07,1,7,2013,93.20
7,2013-01-08,1,8,2013,93.21
8,2013-01-09,1,9,2013,93.08
9,2013-01-10,1,10,2013,93.81


In [7]:
# double check null values

null_oil_df = oil_df.isnull()
print(Counter(null_oil_df['dcoilwtico']))

Counter({False: 1176, True: 528})


In [8]:
# interpolation

oil_df = oil_df.interpolate(method='linear')
oil_df

Unnamed: 0,date,month,day,year,dcoilwtico
0,2013-01-01,1,1,2013,93.140000
1,2013-01-02,1,2,2013,93.140000
2,2013-01-03,1,3,2013,92.970000
3,2013-01-04,1,4,2013,93.120000
4,2013-01-05,1,5,2013,93.146667
5,2013-01-06,1,6,2013,93.173333
6,2013-01-07,1,7,2013,93.200000
7,2013-01-08,1,8,2013,93.210000
8,2013-01-09,1,9,2013,93.080000
9,2013-01-10,1,10,2013,93.810000


In [9]:
null_oil_df = oil_df.isnull()
null_oil_df.describe()

Unnamed: 0,date,month,day,year,dcoilwtico
count,1704,1704,1704,1704,1704
unique,1,1,1,1,1
top,False,False,False,False,False
freq,1704,1704,1704,1704,1704


In [10]:
# scale oil prices to 1-hot encodings of other data
oil_df['oil/100'] = oil_df['dcoilwtico'].divide(100)
oil_df

Unnamed: 0,date,month,day,year,dcoilwtico,oil/100
0,2013-01-01,1,1,2013,93.140000,0.931400
1,2013-01-02,1,2,2013,93.140000,0.931400
2,2013-01-03,1,3,2013,92.970000,0.929700
3,2013-01-04,1,4,2013,93.120000,0.931200
4,2013-01-05,1,5,2013,93.146667,0.931467
5,2013-01-06,1,6,2013,93.173333,0.931733
6,2013-01-07,1,7,2013,93.200000,0.932000
7,2013-01-08,1,8,2013,93.210000,0.932100
8,2013-01-09,1,9,2013,93.080000,0.930800
9,2013-01-10,1,10,2013,93.810000,0.938100


In [11]:
oil_df['oil_change'] = 0.00

In [12]:
for ix, row in oil_df.iterrows():
    #print(ix, row['dcoilwtico'])
    if ix == 0:
        prev = row['dcoilwtico']
        #print('\n')
    else:
        new = (row['dcoilwtico'] - prev)
        #row['oil_change'] = new
        oil_df.set_value(ix,'oil_change',new)
        #print('Previous: ', prev, '\nCurrent: ', row['dcoilwtico'])
        #print('New: ', row['oil_change'], '\n')
        prev = row['dcoilwtico']
        
oil_df

0 93.14


1 93.14
Previous:  93.14 
Current:  93.14
New:  0.0 

2 92.97
Previous:  93.14 
Current:  92.97
New:  0.0 

3 93.12
Previous:  92.97 
Current:  93.12
New:  0.0 

4 93.14666666666668
Previous:  93.12 
Current:  93.14666666666668
New:  0.0 

5 93.17333333333333
Previous:  93.14666666666668 
Current:  93.17333333333333
New:  0.0 

6 93.2
Previous:  93.17333333333333 
Current:  93.2
New:  0.0 

7 93.21
Previous:  93.2 
Current:  93.21
New:  0.0 

8 93.08
Previous:  93.21 
Current:  93.08
New:  0.0 

9 93.81
Previous:  93.08 
Current:  93.81
New:  0.0 

10 93.6
Previous:  93.81 
Current:  93.6
New:  0.0 

11 93.82333333333332
Previous:  93.6 
Current:  93.82333333333332
New:  0.0 

12 94.04666666666667
Previous:  93.82333333333332 
Current:  94.04666666666667
New:  0.0 

13 94.27
Previous:  94.04666666666667 
Current:  94.27
New:  0.0 

14 93.26
Previous:  94.27 
Current:  93.26
New:  0.0 

15 94.28
Previous:  93.26 
Current:  94.28
New:  0.0 

16 95.49
Previous:  94.28 
Current: 

Current:  104.9
New:  0.0 

232 103.93
Previous:  104.9 
Current:  103.93
New:  0.0 

233 104.93
Previous:  103.93 
Current:  104.93
New:  0.0 

234 106.48
Previous:  104.93 
Current:  106.48
New:  0.0 

235 106.28
Previous:  106.48 
Current:  106.28
New:  0.0 

236 106.08
Previous:  106.28 
Current:  106.08
New:  0.0 

237 105.88
Previous:  106.08 
Current:  105.88
New:  0.0 

238 109.11
Previous:  105.88 
Current:  109.11
New:  0.0 

239 110.17
Previous:  109.11 
Current:  110.17
New:  0.0 

240 108.51
Previous:  110.17 
Current:  108.51
New:  0.0 

241 107.98
Previous:  108.51 
Current:  107.98
New:  0.0 

242 108.1525
Previous:  107.98 
Current:  108.1525
New:  0.0 

243 108.325
Previous:  108.1525 
Current:  108.325
New:  0.0 

244 108.4975
Previous:  108.325 
Current:  108.4975
New:  0.0 

245 108.67
Previous:  108.4975 
Current:  108.67
New:  0.0 

246 107.29
Previous:  108.67 
Current:  107.29
New:  0.0 

247 108.5
Previous:  107.29 
Current:  108.5
New:  0.0 

248 110.62
Previ

383 94.235
Previous:  94.0975 
Current:  94.235
New:  0.0 

384 94.3725
Previous:  94.235 
Current:  94.3725
New:  0.0 

385 94.51
Previous:  94.3725 
Current:  94.51
New:  0.0 

386 96.35
Previous:  94.51 
Current:  96.35
New:  0.0 

387 97.23
Previous:  96.35 
Current:  97.23
New:  0.0 

388 96.66
Previous:  97.23 
Current:  96.66
New:  0.0 

389 96.38
Previous:  96.66 
Current:  96.38
New:  0.0 

390 96.1
Previous:  96.38 
Current:  96.1
New:  0.0 

391 95.82
Previous:  96.1 
Current:  95.82
New:  0.0 

392 97.49
Previous:  95.82 
Current:  97.49
New:  0.0 

393 97.34
Previous:  97.49 
Current:  97.34
New:  0.0 

394 98.25
Previous:  97.34 
Current:  98.25
New:  0.0 

395 97.55
Previous:  98.25 
Current:  97.55
New:  0.0 

396 97.17999999999999
Previous:  97.55 
Current:  97.17999999999999
New:  0.0 

397 96.81
Previous:  97.17999999999999 
Current:  96.81
New:  0.0 

398 96.44
Previous:  96.81 
Current:  96.44
New:  0.0 

399 97.24
Previous:  96.44 
Current:  97.24
New:  0.0 

400 

Current:  107.95
New:  0.0 

536 107.57666666666667
Previous:  107.95 
Current:  107.57666666666667
New:  0.0 

537 107.20333333333333
Previous:  107.57666666666667 
Current:  107.20333333333333
New:  0.0 

538 106.83
Previous:  107.20333333333333 
Current:  106.83
New:  0.0 

539 106.64
Previous:  106.83 
Current:  106.64
New:  0.0 

540 107.04
Previous:  106.64 
Current:  107.04
New:  0.0 

541 106.49
Previous:  107.04 
Current:  106.49
New:  0.0 

542 106.46
Previous:  106.49 
Current:  106.46
New:  0.0 

543 106.33
Previous:  106.46 
Current:  106.33
New:  0.0 

544 106.19999999999999
Previous:  106.33 
Current:  106.19999999999999
New:  0.0 

545 106.07
Previous:  106.19999999999999 
Current:  106.07
New:  0.0 

546 106.06
Previous:  106.07 
Current:  106.06
New:  0.0 

547 105.18
Previous:  106.06 
Current:  105.18
New:  0.0 

548 104.76
Previous:  105.18 
Current:  104.76
New:  0.0 

549 104.6175
Previous:  104.76 
Current:  104.6175
New:  0.0 

550 104.475
Previous:  104.6175 



671 78.77
Previous:  79.35666666666667 
Current:  78.77
New:  0.0 

672 77.15
Previous:  78.77 
Current:  77.15
New:  0.0 

673 78.71
Previous:  77.15 
Current:  78.71
New:  0.0 

674 77.87
Previous:  78.71 
Current:  77.87
New:  0.0 

675 78.71
Previous:  77.87 
Current:  78.71
New:  0.0 

676 78.28333333333333
Previous:  78.71 
Current:  78.28333333333333
New:  0.0 

677 77.85666666666667
Previous:  78.28333333333333 
Current:  77.85666666666667
New:  0.0 

678 77.43
Previous:  77.85666666666667 
Current:  77.43
New:  0.0 

679 77.85
Previous:  77.43 
Current:  77.85
New:  0.0 

680 77.16
Previous:  77.85 
Current:  77.16
New:  0.0 

681 74.13
Previous:  77.16 
Current:  74.13
New:  0.0 

682 75.91
Previous:  74.13 
Current:  75.91
New:  0.0 

683 75.82
Previous:  75.91 
Current:  75.82
New:  0.0 

684 75.73
Previous:  75.82 
Current:  75.73
New:  0.0 

685 75.64
Previous:  75.73 
Current:  75.64
New:  0.0 

686 74.55
Previous:  75.64 
Current:  74.55
New:  0.0 

687 74.55
Previous:

Current:  55.56
New:  0.0 

847 57.05
Previous:  55.56 
Current:  57.05
New:  0.0 

848 58.55
Previous:  57.05 
Current:  58.55
New:  0.0 

849 59.62
Previous:  58.55 
Current:  59.62
New:  0.0 

850 59.1
Previous:  59.62 
Current:  59.1
New:  0.0 

851 59.04
Previous:  59.1 
Current:  59.04
New:  0.0 

852 58.980000000000004
Previous:  59.04 
Current:  58.980000000000004
New:  0.0 

853 58.92
Previous:  58.980000000000004 
Current:  58.92
New:  0.0 

854 60.38
Previous:  58.92 
Current:  60.38
New:  0.0 

855 60.93
Previous:  60.38 
Current:  60.93
New:  0.0 

856 58.99
Previous:  60.93 
Current:  58.99
New:  0.0 

857 59.41
Previous:  58.99 
Current:  59.41
New:  0.0 

858 59.349999999999994
Previous:  59.41 
Current:  59.349999999999994
New:  0.0 

859 59.29
Previous:  59.349999999999994 
Current:  59.29
New:  0.0 

860 59.23
Previous:  59.29 
Current:  59.23
New:  0.0 

861 60.72
Previous:  59.23 
Current:  60.72
New:  0.0 

862 60.5
Previous:  60.72 
Current:  60.5
New:  0.0 

863

Current:  48.53
New:  0.0 

1009 47.86
Previous:  48.53 
Current:  47.86
New:  0.0 

1010 49.46
Previous:  47.86 
Current:  49.46
New:  0.0 

1011 49.67
Previous:  49.46 
Current:  49.67
New:  0.0 

1012 48.81
Previous:  49.67 
Current:  48.81
New:  0.0 

1013 47.95
Previous:  48.81 
Current:  47.95
New:  0.0 

1014 47.09
Previous:  47.95 
Current:  47.09
New:  0.0 

1015 46.7
Previous:  47.09 
Current:  46.7
New:  0.0 

1016 46.63
Previous:  46.7 
Current:  46.63
New:  0.0 

1017 46.38
Previous:  46.63 
Current:  46.38
New:  0.0 

1018 47.3
Previous:  46.38 
Current:  47.3
New:  0.0 

1019 46.836666666666666
Previous:  47.3 
Current:  46.836666666666666
New:  0.0 

1020 46.37333333333333
Previous:  46.836666666666666 
Current:  46.37333333333333
New:  0.0 

1021 45.91
Previous:  46.37333333333333 
Current:  45.91
New:  0.0 

1022 45.84
Previous:  45.91 
Current:  45.84
New:  0.0 

1023 45.22
Previous:  45.84 
Current:  45.22
New:  0.0 

1024 44.9
Previous:  45.22 
Current:  44.9
New: 

Previous:  38.14 
Current:  38.1025
New:  0.0 

1180 38.065
Previous:  38.1025 
Current:  38.065
New:  0.0 

1181 38.0275
Previous:  38.065 
Current:  38.0275
New:  0.0 

1182 37.99
Previous:  38.0275 
Current:  37.99
New:  0.0 

1183 36.91
Previous:  37.99 
Current:  36.91
New:  0.0 

1184 36.91
Previous:  36.91 
Current:  36.91
New:  0.0 

1185 36.94
Previous:  36.91 
Current:  36.94
New:  0.0 

1186 35.36
Previous:  36.94 
Current:  35.36
New:  0.0 

1187 35.00666666666667
Previous:  35.36 
Current:  35.00666666666667
New:  0.0 

1188 34.65333333333333
Previous:  35.00666666666667 
Current:  34.65333333333333
New:  0.0 

1189 34.3
Previous:  34.65333333333333 
Current:  34.3
New:  0.0 

1190 34.52
Previous:  34.3 
Current:  34.52
New:  0.0 

1191 37.74
Previous:  34.52 
Current:  37.74
New:  0.0 

1192 37.3
Previous:  37.74 
Current:  37.3
New:  0.0 

1193 39.74
Previous:  37.3 
Current:  39.74
New:  0.0 

1194 39.980000000000004
Previous:  39.74 
Current:  39.980000000000004
New:  

Current:  46.14666666666667
New:  0.0 

1350 46.28
Previous:  46.14666666666667 
Current:  46.28
New:  0.0 

1351 44.91
Previous:  46.28 
Current:  44.91
New:  0.0 

1352 43.62
Previous:  44.91 
Current:  43.62
New:  0.0 

1353 43.85
Previous:  43.62 
Current:  43.85
New:  0.0 

1354 43.04
Previous:  43.85 
Current:  43.04
New:  0.0 

1355 43.14
Previous:  43.04 
Current:  43.14
New:  0.0 

1356 43.24
Previous:  43.14 
Current:  43.24
New:  0.0 

1357 43.34
Previous:  43.24 
Current:  43.34
New:  0.0 

1358 43.85
Previous:  43.34 
Current:  43.85
New:  0.0 

1359 45.33
Previous:  43.85 
Current:  45.33
New:  0.0 

1360 46.1
Previous:  45.33 
Current:  46.1
New:  0.0 

1361 44.36
Previous:  46.1 
Current:  44.36
New:  0.0 

1362 44.77333333333333
Previous:  44.36 
Current:  44.77333333333333
New:  0.0 

1363 45.18666666666667
Previous:  44.77333333333333 
Current:  45.18666666666667
New:  0.0 

1364 45.6
Previous:  45.18666666666667 
Current:  45.6
New:  0.0 

1365 44.65
Previous:  45.6

Previous:  53.25333333333334 
Current:  52.96
New:  0.0 

1505 53.21
Previous:  52.96 
Current:  53.21
New:  0.0 

1506 53.11
Previous:  53.21 
Current:  53.11
New:  0.0 

1507 53.41
Previous:  53.11 
Current:  53.41
New:  0.0 

1508 53.41
Previous:  53.41 
Current:  53.41
New:  0.0 

1509 53.5625
Previous:  53.41 
Current:  53.5625
New:  0.0 

1510 53.715
Previous:  53.5625 
Current:  53.715
New:  0.0 

1511 53.8675
Previous:  53.715 
Current:  53.8675
New:  0.0 

1512 54.02
Previous:  53.8675 
Current:  54.02
New:  0.0 

1513 53.61
Previous:  54.02 
Current:  53.61
New:  0.0 

1514 54.48
Previous:  53.61 
Current:  54.48
New:  0.0 

1515 53.99
Previous:  54.48 
Current:  53.99
New:  0.0 

1516 54.00666666666667
Previous:  53.99 
Current:  54.00666666666667
New:  0.0 

1517 54.02333333333333
Previous:  54.00666666666667 
Current:  54.02333333333333
New:  0.0 

1518 54.04
Previous:  54.02333333333333 
Current:  54.04
New:  0.0 

1519 54.0
Previous:  54.04 
Current:  54.0
New:  0.0 

15

New:  0.0 

1697 47.65
Previous:  47.24 
Current:  47.65
New:  0.0 

1698 47.233333333333334
Previous:  47.65 
Current:  47.233333333333334
New:  0.0 

1699 46.81666666666666
Previous:  47.233333333333334 
Current:  46.81666666666666
New:  0.0 

1700 46.4
Previous:  46.81666666666666 
Current:  46.4
New:  0.0 

1701 46.46
Previous:  46.4 
Current:  46.46
New:  0.0 

1702 45.96
Previous:  46.46 
Current:  45.96
New:  0.0 

1703 47.26
Previous:  45.96 
Current:  47.26
New:  0.0 



In [13]:
oil_df

Unnamed: 0,date,month,day,year,dcoilwtico,oil/100,oil_change
0,2013-01-01,1,1,2013,93.140000,0.931400,0.000000
1,2013-01-02,1,2,2013,93.140000,0.931400,0.000000
2,2013-01-03,1,3,2013,92.970000,0.929700,-0.170000
3,2013-01-04,1,4,2013,93.120000,0.931200,0.150000
4,2013-01-05,1,5,2013,93.146667,0.931467,0.026667
5,2013-01-06,1,6,2013,93.173333,0.931733,0.026667
6,2013-01-07,1,7,2013,93.200000,0.932000,0.026667
7,2013-01-08,1,8,2013,93.210000,0.932100,0.010000
8,2013-01-09,1,9,2013,93.080000,0.930800,-0.130000
9,2013-01-10,1,10,2013,93.810000,0.938100,0.730000


In [14]:
oil_df.to_csv('modified_data/new_oil.csv', index=False)

In [15]:
oil_df = pd.read_csv('modified_data/new_oil.csv')
oil_df

Unnamed: 0,date,month,day,year,dcoilwtico,oil/100,oil_change
0,2013-01-01,1,1,2013,93.140000,0.931400,0.000000
1,2013-01-02,1,2,2013,93.140000,0.931400,0.000000
2,2013-01-03,1,3,2013,92.970000,0.929700,-0.170000
3,2013-01-04,1,4,2013,93.120000,0.931200,0.150000
4,2013-01-05,1,5,2013,93.146667,0.931467,0.026667
5,2013-01-06,1,6,2013,93.173333,0.931733,0.026667
6,2013-01-07,1,7,2013,93.200000,0.932000,0.026667
7,2013-01-08,1,8,2013,93.210000,0.932100,0.010000
8,2013-01-09,1,9,2013,93.080000,0.930800,-0.130000
9,2013-01-10,1,10,2013,93.810000,0.938100,0.730000


In [16]:
train_dates = pd.read_csv('modified_data/train_dates.csv')
test_dates = pd.read_csv('modified_data/test_dates.csv')

In [17]:
train_oil = train_dates.merge(oil_df, on=['date','month','day','year'], how='left')
train_oil

Unnamed: 0,date,month,day,year,dcoilwtico,oil/100,oil_change
0,2013-01-01,1,1,2013,93.140000,0.931400,0.000000
1,2013-01-02,1,2,2013,93.140000,0.931400,0.000000
2,2013-01-03,1,3,2013,92.970000,0.929700,-0.170000
3,2013-01-04,1,4,2013,93.120000,0.931200,0.150000
4,2013-01-05,1,5,2013,93.146667,0.931467,0.026667
5,2013-01-06,1,6,2013,93.173333,0.931733,0.026667
6,2013-01-07,1,7,2013,93.200000,0.932000,0.026667
7,2013-01-08,1,8,2013,93.210000,0.932100,0.010000
8,2013-01-09,1,9,2013,93.080000,0.930800,-0.130000
9,2013-01-10,1,10,2013,93.810000,0.938100,0.730000


In [18]:
test_oil = test_dates.merge(oil_df, on=['date','month','day','year'], how='left')
test_oil

Unnamed: 0,date,month,day,year,dcoilwtico,oil/100,oil_change
0,2017-08-16,8,16,2017,46.8,0.468,-0.77
1,2017-08-17,8,17,2017,47.07,0.4707,0.27
2,2017-08-18,8,18,2017,48.59,0.4859,1.52
3,2017-08-19,8,19,2017,48.19,0.4819,-0.4
4,2017-08-20,8,20,2017,47.79,0.4779,-0.4
5,2017-08-21,8,21,2017,47.39,0.4739,-0.4
6,2017-08-22,8,22,2017,47.65,0.4765,0.26
7,2017-08-23,8,23,2017,48.45,0.4845,0.8
8,2017-08-24,8,24,2017,47.24,0.4724,-1.21
9,2017-08-25,8,25,2017,47.65,0.4765,0.41


In [19]:
train_oil.to_csv('modified_data/train_oil.csv', index=False)
test_oil.to_csv('modified_data/test_oil.csv', index=False)