# Part 1.4 Merging all the data into one csv file

# Capstone Index

    
+    [Part 1.1 Data Munging Tesla Stock Data](./Stock_dayofweek_10_14_2018.ipynb)
+    [Part 1.2 Data Munging Elon Musk Twitter Data](./Tweets_dayofweek_10_14_2018.ipynb)
+    [Part 1.3 Scrapping the SEC](./Web_Scraper_2_10_16_2018_clean.ipynb)
+    [Part 1.4 Merging all the data into one csv file](./Merging_the_data_3_10_18_2018_clean.ipynb)
+    [Part 2 Running models](./Modeling_5_10_19_2018 .ipynb)
+    [Part 2.1 Running more models](./Running_more_models_10_19_2018.ipynb)

## Imports

In [1]:
import pandas as pd 
import numpy as np

%matplotlib inline

## Loading the dataframes

In [2]:
Stock = pd.read_csv('../Clean Code/Stock_dayofweek_10_14_2018')
Tweets = pd.read_csv('../Clean Code/Tweets_dayofweek_10_14_2018')

In [3]:
Stock.head()

Unnamed: 0,date,close,volume,open,high,low,day_of_week
0,2018-10-08,250.56,13371180.0,264.52,267.7599,249.0,1
1,2018-10-05,261.95,17900710.0,274.65,274.88,260.0,5
2,2018-10-04,281.83,9638885.0,293.95,294.0,277.67,4
3,2018-10-03,294.8,7982272.0,303.33,304.6,291.57,3
4,2018-10-02,301.02,11699690.0,313.95,316.84,299.15,2


In [4]:
Tweets.head()

Unnamed: 0,id,created_at,text,day_of_week
0,849636868052275200,2017-04-05 14:56:29,b'And so the robots spared humanity ... https:...,3
1,848988730585096192,2017-04-03 20:01:01,"b""@ForIn2020 @waltmossberg @mims @defcon_5 Exa...",1
2,848943072423497728,2017-04-03 16:59:35,"b'@waltmossberg @mims @defcon_5 Et tu, Walt?'",1
3,848935705057280001,2017-04-03 16:30:19,b'Stormy weather in Shortville ...',1
4,848416049573658624,2017-04-02 06:05:23,"b""@DaveLeeBBC @verge Coal is dying due to nat ...",7


## Making datetime column 'year'

In [5]:
# Making Tweets['created_at'] a datetime formate
Tweets['created_at'] = pd.to_datetime(Tweets['created_at'], format='%Y-%m-%d %H:%M:%S')

In [6]:
# Creating adding my main sorting feature to my Tweets dataframe so I can merge it with my Stock the dataframe
Tweets['year'] =  Tweets['created_at'].dt.strftime('%Y/%m/%d')

In [7]:
# do I need this?
Tweets['year'] = Tweets['year'].astype(str)

## Adding Tweets per day to the 'Stock' dataframe

In [8]:
# Creating new Column to overwrite data
Tweets['times_a_day'] = Tweets['day_of_week']

In [9]:
temp = Tweets.groupby('year').agg(np.size)['id']

In [10]:
temp.index[0]

'2010/06/04'

In [11]:
for i in range(len(temp)):
    for j in range(len(Tweets)):
   # print(temp.index[i], Tweets['year'][j])
        if temp.index[i] == Tweets.loc[j, 'year']:
            Tweets.loc[j, 'times_a_day'] = temp[i]

In [12]:
Tweets.head(10)

Unnamed: 0,id,created_at,text,day_of_week,year,times_a_day
0,849636868052275200,2017-04-05 14:56:29,b'And so the robots spared humanity ... https:...,3,2017/04/05,1
1,848988730585096192,2017-04-03 20:01:01,"b""@ForIn2020 @waltmossberg @mims @defcon_5 Exa...",1,2017/04/03,3
2,848943072423497728,2017-04-03 16:59:35,"b'@waltmossberg @mims @defcon_5 Et tu, Walt?'",1,2017/04/03,3
3,848935705057280001,2017-04-03 16:30:19,b'Stormy weather in Shortville ...',1,2017/04/03,3
4,848416049573658624,2017-04-02 06:05:23,"b""@DaveLeeBBC @verge Coal is dying due to nat ...",7,2017/04/02,4
5,848415731502923777,2017-04-02 06:04:07,"b""@Lexxxzis It's just a helicopter in helicopt...",7,2017/04/02,4
6,848415356263702528,2017-04-02 06:02:38,"b""@verge It won't matter""",7,2017/04/02,4
7,848398971139629057,2017-04-02 04:57:31,b'@SuperCoolCube Pretty good',7,2017/04/02,4
8,848244577521647616,2017-04-01 18:44:01,"b""Why did we waste so much time developing sil...",6,2017/04/01,5
9,848243350993895424,2017-04-01 18:39:09,b'Technology breakthrough: turns out chemtrail...,6,2017/04/01,5


## Removing Duplicates

In [13]:
# Number of times Elon Musk tweeted more than one time a day on a trading day from mid 2010 to mid 2017
Tweets['year'].duplicated().sum()

1921

In [14]:
Tweets[Tweets['year'].duplicated(keep=False)].head()

Unnamed: 0,id,created_at,text,day_of_week,year,times_a_day
1,848988730585096192,2017-04-03 20:01:01,"b""@ForIn2020 @waltmossberg @mims @defcon_5 Exa...",1,2017/04/03,3
2,848943072423497728,2017-04-03 16:59:35,"b'@waltmossberg @mims @defcon_5 Et tu, Walt?'",1,2017/04/03,3
3,848935705057280001,2017-04-03 16:30:19,b'Stormy weather in Shortville ...',1,2017/04/03,3
4,848416049573658624,2017-04-02 06:05:23,"b""@DaveLeeBBC @verge Coal is dying due to nat ...",7,2017/04/02,4
5,848415731502923777,2017-04-02 06:04:07,"b""@Lexxxzis It's just a helicopter in helicopt...",7,2017/04/02,4


In [15]:
Tweets.shape

(2819, 6)

In [16]:
# Dropping the Duplicates
Tweets.drop_duplicates('year', keep = 'first', inplace = True)

In [17]:
Tweets.head()

Unnamed: 0,id,created_at,text,day_of_week,year,times_a_day
0,849636868052275200,2017-04-05 14:56:29,b'And so the robots spared humanity ... https:...,3,2017/04/05,1
1,848988730585096192,2017-04-03 20:01:01,"b""@ForIn2020 @waltmossberg @mims @defcon_5 Exa...",1,2017/04/03,3
4,848416049573658624,2017-04-02 06:05:23,"b""@DaveLeeBBC @verge Coal is dying due to nat ...",7,2017/04/02,4
8,848244577521647616,2017-04-01 18:44:01,"b""Why did we waste so much time developing sil...",6,2017/04/01,5
13,847958571895619584,2017-03-31 23:47:32,b'@BadAstronomer We can def bring it back like...,5,2017/03/31,11


In [18]:
Tweets.shape

(898, 6)

## Saving

In [19]:
pd.DataFrame(Tweets).to_csv('Tweets_final_10_18_2018', index = False)

## Adding year column to the Stock dateframe

In [20]:
Stock.head()

Unnamed: 0,date,close,volume,open,high,low,day_of_week
0,2018-10-08,250.56,13371180.0,264.52,267.7599,249.0,1
1,2018-10-05,261.95,17900710.0,274.65,274.88,260.0,5
2,2018-10-04,281.83,9638885.0,293.95,294.0,277.67,4
3,2018-10-03,294.8,7982272.0,303.33,304.6,291.57,3
4,2018-10-02,301.02,11699690.0,313.95,316.84,299.15,2


In [21]:
# Converting to datetime, pd.to_datetime
Stock['date'] =  pd.to_datetime(Stock['date'], format='%Y-%m-%d')

In [22]:
Stock['year'] =  Stock['date'].dt.strftime('%Y/%m/%d')

In [23]:
Stock.head()

Unnamed: 0,date,close,volume,open,high,low,day_of_week,year
0,2018-10-08,250.56,13371180.0,264.52,267.7599,249.0,1,2018/10/08
1,2018-10-05,261.95,17900710.0,274.65,274.88,260.0,5,2018/10/05
2,2018-10-04,281.83,9638885.0,293.95,294.0,277.67,4,2018/10/04
3,2018-10-03,294.8,7982272.0,303.33,304.6,291.57,3,2018/10/03
4,2018-10-02,301.02,11699690.0,313.95,316.84,299.15,2,2018/10/02


## Saving

In [24]:
pd.DataFrame(Stock).to_csv('Stock_final_10_18_2018', index = False)

## Merging the dataframes

In [25]:
Stock = pd.read_csv('./Stock_final_10_18_2018')
Tweets = pd.read_csv('./Tweets_final_10_18_2018')

In [26]:
Stock.head()

Unnamed: 0,date,close,volume,open,high,low,day_of_week,year
0,2018-10-08,250.56,13371180.0,264.52,267.7599,249.0,1,2018/10/08
1,2018-10-05,261.95,17900710.0,274.65,274.88,260.0,5,2018/10/05
2,2018-10-04,281.83,9638885.0,293.95,294.0,277.67,4,2018/10/04
3,2018-10-03,294.8,7982272.0,303.33,304.6,291.57,3,2018/10/03
4,2018-10-02,301.02,11699690.0,313.95,316.84,299.15,2,2018/10/02


In [27]:
Stock.isna().sum()

date           0
close          0
volume         0
open           0
high           0
low            0
day_of_week    0
year           0
dtype: int64

In [28]:
Tweets.head()

Unnamed: 0,id,created_at,text,day_of_week,year,times_a_day
0,849636868052275200,2017-04-05 14:56:29,b'And so the robots spared humanity ... https:...,3,2017/04/05,1
1,848988730585096192,2017-04-03 20:01:01,"b""@ForIn2020 @waltmossberg @mims @defcon_5 Exa...",1,2017/04/03,3
2,848416049573658624,2017-04-02 06:05:23,"b""@DaveLeeBBC @verge Coal is dying due to nat ...",7,2017/04/02,4
3,848244577521647616,2017-04-01 18:44:01,"b""Why did we waste so much time developing sil...",6,2017/04/01,5
4,847958571895619584,2017-03-31 23:47:32,b'@BadAstronomer We can def bring it back like...,5,2017/03/31,11


In [29]:
Stock.shape

(2085, 8)

In [30]:
Tweets.shape

(898, 6)

In [31]:
# was doing this before, it is wrong
#merged = pd.merge(Stock, Tweets, on='year', how='outer')

In [32]:
# was doing this before, it is wrong
merged = pd.merge(Stock, Tweets, on='year', how='outer')

In [33]:
# Merging the Tweet dataframe to my Stock dataframe by Stock ['year'], as Stock dataframe has all the "Y" data form my model
#merged = pd.concat([Stock, Tweets], axis = 1, join_axes, ignore_index=True)#join = 'outer'

In [34]:
merged

Unnamed: 0,date,close,volume,open,high,low,day_of_week_x,year,id,created_at,text,day_of_week_y,times_a_day
0,2018-10-08,250.56,13371180.0,264.52,267.7599,249.0000,1.0,2018/10/08,,,,,
1,2018-10-05,261.95,17900710.0,274.65,274.8800,260.0000,5.0,2018/10/05,,,,,
2,2018-10-04,281.83,9638885.0,293.95,294.0000,277.6700,4.0,2018/10/04,,,,,
3,2018-10-03,294.80,7982272.0,303.33,304.6000,291.5700,3.0,2018/10/03,,,,,
4,2018-10-02,301.02,11699690.0,313.95,316.8400,299.1500,2.0,2018/10/02,,,,,
5,2018-10-01,310.70,21714210.0,305.77,311.4400,301.0500,1.0,2018/10/01,,,,,
6,2018-09-28,264.77,33597290.0,270.26,278.0000,260.5550,5.0,2018/09/28,,,,,
7,2018-09-27,307.52,7337760.0,312.90,314.9600,306.9100,4.0,2018/09/27,,,,,
8,2018-09-26,309.58,7835863.0,301.91,313.8900,301.1093,3.0,2018/09/26,,,,,
9,2018-09-25,300.99,4472287.0,300.00,304.6000,296.5000,2.0,2018/09/25,,,,,


## Filling in null values, ie trading days that Elon Musk did not tweeted on

In [35]:
merged.isnull().sum()

date              261
close             261
volume            261
open              261
high              261
low               261
day_of_week_x     261
year                0
id               1448
created_at       1448
text             1448
day_of_week_y    1448
times_a_day      1448
dtype: int64

In [36]:
merged = merged.loc[merged['date'].dropna().index,:]

In [37]:
merged.isnull().sum()

date                0
close               0
volume              0
open                0
high                0
low                 0
day_of_week_x       0
year                0
id               1448
created_at       1448
text             1448
day_of_week_y    1448
times_a_day      1448
dtype: int64

## Filling in null values for trading days without Elon Musk

In [38]:
merged.fillna(0, inplace = True)

In [39]:
merged.isnull().sum()

date             0
close            0
volume           0
open             0
high             0
low              0
day_of_week_x    0
year             0
id               0
created_at       0
text             0
day_of_week_y    0
times_a_day      0
dtype: int64

## Improving the column names

In [40]:
merged.day_of_week_x.value_counts()

3.0     428
2.0     427
4.0     421
5.0     419
1.0     389
23.0      1
Name: day_of_week_x, dtype: int64

In [41]:
merged_2 = merged

In [42]:
merged_2 = merged_2.rename(columns={"date": "stock_date", "close": "stock_close", "volume" : "stock_volume", "open" : "stock_open", "high" : "stock_high", "low" : "stock_low", "day_of_week_x" : "stock_day_of_week", "year" : "stock_year"})

In [43]:
merged_2 = merged_2.rename(columns={"id": "tweet_id", "created_at": "tweet_created_at", "text" : "tweet_text", "day_of_week_y" : "tweet_day_of_week", "times_a_day" : "tweet_times_a_day"})

In [44]:
merged_2.head()

Unnamed: 0,stock_date,stock_close,stock_volume,stock_open,stock_high,stock_low,stock_day_of_week,stock_year,tweet_id,tweet_created_at,tweet_text,tweet_day_of_week,tweet_times_a_day
0,2018-10-08,250.56,13371180.0,264.52,267.7599,249.0,1.0,2018/10/08,0.0,0,0,0.0,0.0
1,2018-10-05,261.95,17900710.0,274.65,274.88,260.0,5.0,2018/10/05,0.0,0,0,0.0,0.0
2,2018-10-04,281.83,9638885.0,293.95,294.0,277.67,4.0,2018/10/04,0.0,0,0,0.0,0.0
3,2018-10-03,294.8,7982272.0,303.33,304.6,291.57,3.0,2018/10/03,0.0,0,0,0.0,0.0
4,2018-10-02,301.02,11699690.0,313.95,316.84,299.15,2.0,2018/10/02,0.0,0,0,0.0,0.0


## Saving

In [45]:
pd.DataFrame(merged_2).to_csv('merged_10_18_2018', index = False)

## Munging the SEC scrapper

In [46]:
sec = pd.read_csv('../Web_Scraping/Web_Scraper_2_10_16_2018_clean.csv')

## Making column name standardized again

In [47]:
sec.columns

Index(['Filings', 'Date'], dtype='object')

In [48]:
sec.rename(columns={ "Filings" : "filings"}, inplace = True)

### Making 'year' column

In [49]:
sec.head()

Unnamed: 0,filings,Date
0,8-K,2018-10-17
1,SC 13G/A,2018-10-10
2,8-K,2018-10-02
3,8-K,2018-10-01
4,8-K,2018-09-07


In [50]:
# Making Tweets['created_at'] a datetime formate
sec['Date'] = pd.to_datetime(sec['Date'], format='%Y-%m-%d')

In [51]:
sec['Date'] =  sec['Date'].dt.strftime('%Y/%m/%d')

In [52]:
sec['year'] = sec['Date'].astype(str)

In [53]:
sec.head()

Unnamed: 0,filings,Date,year
0,8-K,2018/10/17,2018/10/17
1,SC 13G/A,2018/10/10,2018/10/10
2,8-K,2018/10/02,2018/10/02
3,8-K,2018/10/01,2018/10/01
4,8-K,2018/09/07,2018/09/07


### Deleting an extra and confusing column

In [54]:
del sec['Date']

## Dropping the Duplicates

In [55]:
# Looking at all the duplicate dates, where the same type of thing were filled more than once on the same day
sec[sec.duplicated(keep=False)]

Unnamed: 0,filings,year
10,8-K,2018/07/02
11,8-K,2018/07/02
33,SC 13G/A,2018/02/14
34,SC 13G/A,2018/02/14
80,424B5,2017/03/17
81,424B5,2017/03/17
84,424B5,2017/03/15
85,424B5,2017/03/15
101,424B5,2016/11/21
102,424B5,2016/11/21


In [56]:
sec.shape

(320, 2)

In [57]:
# Dropping all the "extra" rows where same type of thing for filled more than once on the same day
# Might come back and makes these an extra feature type
sec.drop_duplicates(keep = 'first', inplace = True)

In [58]:
sec.shape

(303, 2)

In [59]:
sec.duplicated().sum()

0

In [60]:
sec.head()

Unnamed: 0,filings,year
0,8-K,2018/10/17
1,SC 13G/A,2018/10/10
2,8-K,2018/10/02
3,8-K,2018/10/01
4,8-K,2018/09/07


In [61]:
# Looking at all the duplicate dates, where two different things were filled more than once on the same day
sec[sec['year'].duplicated(keep=False)]

Unnamed: 0,filings,year
14,SD,2018/05/29
15,DEFA14A,2018/05/29
16,PX14A6G,2018/05/29
23,DEFA14A,2018/04/26
24,DEF 14A,2018/04/26
31,S-8,2018/02/23
32,10-K,2018/02/23
38,8-K,2018/02/08
39,CT ORDER,2018/02/08
40,DEF 14A,2018/02/08


In [62]:
sec.duplicated('year').sum()

57

In [63]:
# Dropping all the duplicate dates, where two different things were filled more than once on the same day
# Might come back and makes these an extra feature type
sec.drop_duplicates('year', keep = 'first', inplace = True)

In [64]:
sec.shape

(246, 2)

## Saving

In [65]:
pd.DataFrame(sec).to_csv('sec_10_18_2018', index = False)

## Adding in the SEC scrapper

In [66]:
M2 = pd.read_csv('./merged_10_18_2018')
SEC = pd.read_csv('./sec_10_18_2018')

In [67]:
SEC.count()

filings    246
year       246
dtype: int64

In [68]:
SEC.head(5)

Unnamed: 0,filings,year
0,8-K,2018/10/17
1,SC 13G/A,2018/10/10
2,8-K,2018/10/02
3,8-K,2018/10/01
4,8-K,2018/09/07


In [69]:
M2.shape

(2085, 13)

In [70]:
# because I changed the name above...probably should have not done that
M2 = M2.rename(columns={"stock_year" : "year"})

In [71]:
M2

Unnamed: 0,stock_date,stock_close,stock_volume,stock_open,stock_high,stock_low,stock_day_of_week,year,tweet_id,tweet_created_at,tweet_text,tweet_day_of_week,tweet_times_a_day
0,2018-10-08,250.56,13371180.0,264.520,267.7599,249.0000,1.0,2018/10/08,0.0,0,0,0.0,0.0
1,2018-10-05,261.95,17900710.0,274.650,274.8800,260.0000,5.0,2018/10/05,0.0,0,0,0.0,0.0
2,2018-10-04,281.83,9638885.0,293.950,294.0000,277.6700,4.0,2018/10/04,0.0,0,0,0.0,0.0
3,2018-10-03,294.80,7982272.0,303.330,304.6000,291.5700,3.0,2018/10/03,0.0,0,0,0.0,0.0
4,2018-10-02,301.02,11699690.0,313.950,316.8400,299.1500,2.0,2018/10/02,0.0,0,0,0.0,0.0
5,2018-10-01,310.70,21714210.0,305.770,311.4400,301.0500,1.0,2018/10/01,0.0,0,0,0.0,0.0
6,2018-09-28,264.77,33597290.0,270.260,278.0000,260.5550,5.0,2018/09/28,0.0,0,0,0.0,0.0
7,2018-09-27,307.52,7337760.0,312.900,314.9600,306.9100,4.0,2018/09/27,0.0,0,0,0.0,0.0
8,2018-09-26,309.58,7835863.0,301.910,313.8900,301.1093,3.0,2018/09/26,0.0,0,0,0.0,0.0
9,2018-09-25,300.99,4472287.0,300.000,304.6000,296.5000,2.0,2018/09/25,0.0,0,0,0.0,0.0


In [72]:
# This is wrong and what I was doing before
# Merging on the 'left' ie M2 here so I only merge filing data for dates in my main dataframe range
#M3 = pd.merge(M2, SEC, on='year', how='left')

In [73]:
# Merging the SEC dataframe to my M3 dataframe by M3 ['year'], as M3 dataframe has all the "Y" data form my model
M3 = pd.concat([M2, SEC], axis = 1, join = 'outer')

In [74]:
M3.shape

(2085, 15)

## Filling in null values, ie trading days that did not have SEC filings

In [75]:
M3

Unnamed: 0,stock_date,stock_close,stock_volume,stock_open,stock_high,stock_low,stock_day_of_week,year,tweet_id,tweet_created_at,tweet_text,tweet_day_of_week,tweet_times_a_day,filings,year.1
0,2018-10-08,250.56,13371180.0,264.520,267.7599,249.0000,1.0,2018/10/08,0.0,0,0,0.0,0.0,8-K,2018/10/17
1,2018-10-05,261.95,17900710.0,274.650,274.8800,260.0000,5.0,2018/10/05,0.0,0,0,0.0,0.0,SC 13G/A,2018/10/10
2,2018-10-04,281.83,9638885.0,293.950,294.0000,277.6700,4.0,2018/10/04,0.0,0,0,0.0,0.0,8-K,2018/10/02
3,2018-10-03,294.80,7982272.0,303.330,304.6000,291.5700,3.0,2018/10/03,0.0,0,0,0.0,0.0,8-K,2018/10/01
4,2018-10-02,301.02,11699690.0,313.950,316.8400,299.1500,2.0,2018/10/02,0.0,0,0,0.0,0.0,8-K,2018/09/07
5,2018-10-01,310.70,21714210.0,305.770,311.4400,301.0500,1.0,2018/10/01,0.0,0,0,0.0,0.0,8-K,2018/08/21
6,2018-09-28,264.77,33597290.0,270.260,278.0000,260.5550,5.0,2018/09/28,0.0,0,0,0.0,0.0,8-K,2018/08/14
7,2018-09-27,307.52,7337760.0,312.900,314.9600,306.9100,4.0,2018/09/27,0.0,0,0,0.0,0.0,10-Q,2018/08/06
8,2018-09-26,309.58,7835863.0,301.910,313.8900,301.1093,3.0,2018/09/26,0.0,0,0,0.0,0.0,8-K,2018/08/01
9,2018-09-25,300.99,4472287.0,300.000,304.6000,296.5000,2.0,2018/09/25,0.0,0,0,0.0,0.0,8-K,2018/07/30


In [76]:
M3.isnull().sum()

stock_date              0
stock_close             0
stock_volume            0
stock_open              0
stock_high              0
stock_low               0
stock_day_of_week       0
year                    0
tweet_id                0
tweet_created_at        0
tweet_text              0
tweet_day_of_week       0
tweet_times_a_day       0
filings              1839
year                 1839
dtype: int64

In [77]:
# Filling in the empty filings values on the days there were no SEC filings

#Why is this not working?
#M3['filings'].fillna('none', inplace = True)

In [78]:
M3.fillna('none', inplace = True)

In [79]:
M3.isna().sum()

stock_date           0
stock_close          0
stock_volume         0
stock_open           0
stock_high           0
stock_low            0
stock_day_of_week    0
year                 0
tweet_id             0
tweet_created_at     0
tweet_text           0
tweet_day_of_week    0
tweet_times_a_day    0
filings              0
year                 0
dtype: int64

In [80]:
M3.shape

(2085, 15)

In [81]:
M3

Unnamed: 0,stock_date,stock_close,stock_volume,stock_open,stock_high,stock_low,stock_day_of_week,year,tweet_id,tweet_created_at,tweet_text,tweet_day_of_week,tweet_times_a_day,filings,year.1
0,2018-10-08,250.56,13371180.0,264.520,267.7599,249.0000,1.0,2018/10/08,0.0,0,0,0.0,0.0,8-K,2018/10/17
1,2018-10-05,261.95,17900710.0,274.650,274.8800,260.0000,5.0,2018/10/05,0.0,0,0,0.0,0.0,SC 13G/A,2018/10/10
2,2018-10-04,281.83,9638885.0,293.950,294.0000,277.6700,4.0,2018/10/04,0.0,0,0,0.0,0.0,8-K,2018/10/02
3,2018-10-03,294.80,7982272.0,303.330,304.6000,291.5700,3.0,2018/10/03,0.0,0,0,0.0,0.0,8-K,2018/10/01
4,2018-10-02,301.02,11699690.0,313.950,316.8400,299.1500,2.0,2018/10/02,0.0,0,0,0.0,0.0,8-K,2018/09/07
5,2018-10-01,310.70,21714210.0,305.770,311.4400,301.0500,1.0,2018/10/01,0.0,0,0,0.0,0.0,8-K,2018/08/21
6,2018-09-28,264.77,33597290.0,270.260,278.0000,260.5550,5.0,2018/09/28,0.0,0,0,0.0,0.0,8-K,2018/08/14
7,2018-09-27,307.52,7337760.0,312.900,314.9600,306.9100,4.0,2018/09/27,0.0,0,0,0.0,0.0,10-Q,2018/08/06
8,2018-09-26,309.58,7835863.0,301.910,313.8900,301.1093,3.0,2018/09/26,0.0,0,0,0.0,0.0,8-K,2018/08/01
9,2018-09-25,300.99,4472287.0,300.000,304.6000,296.5000,2.0,2018/09/25,0.0,0,0,0.0,0.0,8-K,2018/07/30


## Saving

In [82]:
pd.DataFrame(M3).to_csv('Merging_the_data_3_10_18_2018_clean', index = False)