### Importing necessary libraries-

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

### Reading the dataset-

In [2]:
df = pd.read_csv("./Data/BitcoinTweetWithSentiment.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,mdy,date,content,likes,retweets,Sentiment
0,0,06-03-2023,2023-03-06 23:58:49+00:00,girl explain bitcoin,501,140,1
1,1,06-03-2023,2023-03-06 23:58:07+00:00,roughly hour leave pulsechain testnet fully sy...,32,1,1
2,2,06-03-2023,2023-03-06 23:57:47+00:00,come join conversation bitcoin fix hood americ...,24,1,1
3,3,06-03-2023,2023-03-06 23:54:57+00:00,get haircut pay bitcoin feel good always offer...,14,0,1
4,4,06-03-2023,2023-03-06 23:54:52+00:00,bitcoin usd weekly wrong bearish rising wedge ...,721,65,0


In [3]:
df.shape

(354893, 7)

### Dropping unnecessary columns from the dataset-

In [4]:
df.drop(df.columns[0], axis=1, inplace=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 354893 entries, 0 to 354892
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   mdy        354893 non-null  object
 1   date       354893 non-null  object
 2   content    354893 non-null  object
 3   likes      354893 non-null  int64 
 4   retweets   354893 non-null  int64 
 5   Sentiment  354893 non-null  int64 
dtypes: int64(3), object(3)
memory usage: 16.2+ MB


### Converting the 'mdy' and 'date' columns to datetime format-

In [6]:
df['date'] = df['date'].apply(lambda x: x.split('+')[0])
df[['mdy', 'date']] = df[['mdy', 'date']].apply(pd.to_datetime, format='mixed')

In [7]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 354893 entries, 0 to 354892
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   mdy        354893 non-null  datetime64[ns]
 1   date       354893 non-null  datetime64[ns]
 2   content    354893 non-null  object        
 3   likes      354893 non-null  int64         
 4   retweets   354893 non-null  int64         
 5   Sentiment  354893 non-null  int64         
dtypes: datetime64[ns](2), int64(3), object(1)
memory usage: 16.2+ MB


Unnamed: 0,mdy,date,content,likes,retweets,Sentiment
0,2023-06-03,2023-03-06 23:58:49,girl explain bitcoin,501,140,1
1,2023-06-03,2023-03-06 23:58:07,roughly hour leave pulsechain testnet fully sy...,32,1,1
2,2023-06-03,2023-03-06 23:57:47,come join conversation bitcoin fix hood americ...,24,1,1
3,2023-06-03,2023-03-06 23:54:57,get haircut pay bitcoin feel good always offer...,14,0,1
4,2023-06-03,2023-03-06 23:54:52,bitcoin usd weekly wrong bearish rising wedge ...,721,65,0


### Extracting the Year, Month, Day, Hour, Minute and Second (features) from the dataset-

In [8]:
date = df['date']
dt = date.dt
y, m, d, h, m, s = dt.year, dt.month, dt.day, dt. hour, dt.minute, dt.second

### Extracting the Likes, Retweets (features) and Sentiment (Target Variable) from the dataset-

In [9]:
likes, retweets = df['likes'], df['retweets']
sentiment = df['Sentiment']

### Grouping the Input Features and Target Variable respectively-

In [10]:
X = np.array([y, m, d, h, m, s, likes, retweets])
Y = np.array(sentiment)

In [11]:
X.T.shape, Y.shape

((354893, 8), (354893,))

### Making a list of Machine Learning model constructors-

In [12]:
model_list = [LinearRegression(), RandomForestRegressor(n_estimators=10)]

### Training a selected Machine Learning model over the data-

In [13]:
model = model_list[1]
model.fit(X.T, Y)

### Taking user inputs and storing them into an array-

In [14]:
inp_year, inp_month, inp_day, inp_hr, inp_min, inp_sec = [int(input(f'Please Enter the {i}: ')) for i in ('Year', 'Month', 'Day', 
                                                                                                          'Hour', 'Minute', 'Second')]

inp_likes, inp_retweets = [int(input(f'Please Enter the Number of {i}: ')) for i in ('Likes', 'Retweets')]

X_pred = np.array([inp_year, inp_month, inp_day, inp_hr, inp_min, inp_sec, inp_likes, inp_retweets])

Please Enter the Year:  2003
Please Enter the Month:  7
Please Enter the Day:  19
Please Enter the Hour:  14
Please Enter the Minute:  6
Please Enter the Second:  32
Please Enter the Number of Likes:  198
Please Enter the Number of Retweets:  423


### Making predictions for the input features-

In [15]:
Y_pred = model.predict([X_pred])

### Manually decoding the pre-encoded prediction outputs-

In [16]:
Y_pred_decoded = '+ve(up)' if Y_pred == 1 else '-ve(down)' + ' market sentiment'

In [17]:
Y_pred_decoded

'+ve(up)'

### Printing the Output results in a comprehendable format-

In [18]:
print(f'Bitcoin(BTC) has a {Y_pred_decoded}.')

Bitcoin(BTC) has a +ve(up).
