# Master table creation
In this notebook we aggregate all the previously downloaded data and the sentiment analysis done on the different data sources.

In [1]:
# Load the needed packages:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
%matplotlib inline

## 1.0 Load data and create a Mastertable

In [2]:
# Load CSV files:
poloniex = pd.read_csv("Data/poloniex_data.csv")
ggtrends = pd.read_csv("Data/google_trends.csv", index_col=0)
twitter = pd.read_csv("Data/twitter_agg_ddb.csv")
reddit = pd.read_csv("Data/allreddit_nlp.csv", index_col=0)
forum = pd.read_csv("Data/merit_compound.csv", index_col=0)

In [3]:
# A bit of preprocessing:
# REDDIT:
reddit = reddit.rename(columns={'date_notime': 'date', 'count_comments': 'Reddit Comments (#)', 'mean_sa': 'Reddit Average SA'})
# TWITTER:
twitter = twitter.rename(columns={'created_at': 'date','Average SA': 'Twitter Average SA'})
twitter['date'] =  pd.to_datetime(twitter['date'], format='%Y%m%d %H:%M:%S')
twitter['date'] = twitter['date'].dt.date
twitter['date'] = twitter['date'].apply(str)

# FORUM:
forum = forum.rename(columns={'newdate': 'date', 'compound': 'Forum SA Merit', 'merit_compound': 'Forum SA Merit (weighted)'})

In [4]:
# Merge the datasets into one Master Table:
master = pd.merge(poloniex, ggtrends, how = 'inner', on = 'date')
master = pd.merge(master, twitter, how = 'outer', on = 'date')
master = pd.merge(master, forum, how = 'inner', on = 'date')
master = pd.merge(master, reddit, how = 'inner', on = 'date')

In [5]:
# Create the UP/DOWN class:
master['invest'] = master['return_day+1']>0

In [6]:
pd.DataFrame.to_csv(master, 'Data/20180920_mastertable.csv')

In [7]:
master.head(5)

Unnamed: 0,date,return_day+1,close,volume,volatility_14,googletrends_buy_sell,Tweets (#),Active Influencers (#),Twitter Average SA,Forum SA Merit,Forum SA Merit (weighted),Reddit Comments (#),Reddit Average SA,invest
0,2017-05-01,0.020854,1530.0,20038400.0,0.020241,0.857143,,,,0.363063,170.581733,1262,0.100726,True
1,2017-05-02,0.034389,1561.907,11571050.0,0.020173,0.857143,105.0,25.0,0.156365,0.3249,163.314061,1600,0.108506,True
2,2017-05-03,-0.007255,1615.62,15060860.0,0.019967,0.904762,86.0,30.0,0.207976,0.359885,177.341913,1353,0.110652,False
3,2017-05-04,-0.03772,1603.898572,26329240.0,0.020404,0.851852,111.0,27.0,0.152614,0.380796,190.054485,1766,0.102435,False
4,2017-05-05,0.03551,1543.4,32397180.0,0.024767,0.833333,75.0,28.0,0.117646,0.381558,158.822512,1689,0.099547,True
