# Tesla Stock Market prediction based on Elon Musk Tweet Interaction

In [1]:
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt

In [2]:
tesla_stock_data = pd.read_csv("TSLA.csv")
elon_tweets_data = pd.read_csv("TweetsElonMusk.csv")

## Describing Datasets

### Tesla Data

In [3]:
tesla_stock_data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,2956.0,2956.0,2956.0,2956.0,2956.0,2956.0
mean,138.691296,141.771603,135.425953,138.762183,138.762183,31314490.0
std,250.044839,255.863239,243.774157,250.123115,250.123115,27983830.0
min,3.228,3.326,2.996,3.16,3.16,592500.0
25%,19.627,20.402,19.1275,19.615,19.615,13102880.0
50%,46.656999,47.487001,45.820002,46.545,46.545,24886800.0
75%,68.057001,69.3575,66.911501,68.103998,68.103998,39738750.0
max,1234.410034,1243.48999,1217.0,1229.910034,1229.910034,304694000.0


### Elon Musk Data

In [4]:
elon_tweets_data.describe()

Unnamed: 0,id,conversation_id,timezone,user_id,place,replies_count,retweets_count,likes_count,video,near,geo,source,user_rt_id,user_rt,retweet_id,retweet_date,translate,trans_src,trans_dest
count,12562.0,12562.0,12562.0,12562.0,0.0,12562.0,12562.0,12562.0,12562.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,1.049919e+18,1.04749e+18,300.0,44196397.0,,679.370005,2256.727193,21470.06,0.063047,,,,,,,,,,
std,2.761981e+17,2.783202e+17,0.0,0.0,,2453.853763,9793.382524,67277.87,0.243058,,,,,,,,,,
min,15434730000.0,1659576000.0,300.0,44196397.0,,0.0,0.0,2.0,0.0,,,,,,,,,,
25%,9.444958e+17,9.413883e+17,300.0,44196397.0,,45.0,45.0,879.0,0.0,,,,,,,,,,
50%,1.117568e+18,1.117117e+18,300.0,44196397.0,,113.0,144.5,2439.5,0.0,,,,,,,,,,
75%,1.255896e+18,1.254922e+18,300.0,44196397.0,,373.0,847.75,11282.5,0.0,,,,,,,,,,
max,1.383296e+18,1.383296e+18,300.0,44196397.0,,61349.0,361461.0,1593425.0,1.0,,,,,,,,,,


## Combine Datasets and Refine

In [5]:
refined_elon = DataFrame(elon_tweets_data, columns=['replies_count', 'retweets_count', 'likes_count', 'date'])
refined_tesla_stock = DataFrame(tesla_stock_data, columns=['High', 'Low', 'Date'])
refined_tesla_stock = refined_tesla_stock.rename(columns={'Date': 'date'})

In [6]:
sorted_elon = refined_elon.sort_values(by='date', ascending=True)
sorted_tesla = refined_tesla_stock.sort_values(by='date', ascending=True)

#### Merged dataframes

In [7]:
data_merged = pd.merge(refined_elon, refined_tesla_stock, on='date', how='inner')
# create everage row
data_merged['average_stock'] = (data_merged['High'] + data_merged['Low']) /2

## Data clean and Analysis

### Null values

In [8]:
pd.isnull(data_merged).any()

replies_count     False
retweets_count    False
likes_count       False
date              False
High              False
Low               False
average_stock     False
dtype: bool

Data has no null values

### Min values each column

In [9]:
data_merged.min()

replies_count              0
retweets_count             0
likes_count                2
date              2011-12-01
High                   5.376
Low                    4.528
average_stock          5.114
dtype: object

### Max values each column

In [10]:
data_merged.max()

replies_count          57987
retweets_count        301391
likes_count          1593425
date              2021-04-16
High              900.400024
Low               871.599976
average_stock         883.75
dtype: object

### Mean(average) values each column

In [11]:
data_merged.mean(numeric_only=True)

replies_count       685.899456
retweets_count     2260.268888
likes_count       21401.644336
High                163.749719
Low                 155.572628
average_stock       159.661173
dtype: float64

## Data Info

In [12]:
data_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8643 entries, 0 to 8642
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   replies_count   8643 non-null   int64  
 1   retweets_count  8643 non-null   int64  
 2   likes_count     8643 non-null   int64  
 3   date            8643 non-null   object 
 4   High            8643 non-null   float64
 5   Low             8643 non-null   float64
 6   average_stock   8643 non-null   float64
dtypes: float64(3), int64(3), object(1)
memory usage: 540.2+ KB


## Dependent and Independent variables

In [14]:
y = DataFrame(data_merged, columns=['average_stock'])
x = DataFrame(data_merged, columns=['replies_count', 'retweets_count', 'likes_count'])