In [2]:
import seaborn as sns
import numpy as np
import pandas as pd
import snscrape.modules.twitter as sntwitter

sns.set(rc = {'figure.figsize':(15, 7)})
sns.set_style('white')
pd.set_option('display.max_columns', 500)

In [6]:
tweets = pd.read_csv('./data/stock_tweets.csv', parse_dates=['Date'])
stocks = pd.read_csv('./data/stock_yfinance_data.csv')

### Scraping Twitter

In [36]:
twt_list = []

for ticker in list(tweets['Stock Name'].unique()):
    for idx, tweet in enumerate(sntwitter.TwitterSearchScraper(
        f'{ticker} since:2022-09-01 until:2023-01-09').get_items()):
        
        if idx > 50:
            break

        twt_list.append([tweet.date, tweet.rawContent, ticker])

In [60]:
pd.DataFrame(twt_list, columns=['Date', 'Tweet', 'Stock Name']).merge(
    tweets[['Stock Name', 'Company Name']].drop_duplicates(),
    on='Stock Name'
)

Unnamed: 0,Date,Tweet,Stock Name,Company Name
0,2023-01-08 23:59:48+00:00,@cb_doge @elonmusk @Tesla @mayemusk 😅,TSLA,"Tesla, Inc."
1,2023-01-08 23:59:38+00:00,Many r discounting the position of strength Te...,TSLA,"Tesla, Inc."
2,2023-01-08 23:59:28+00:00,Everyone is so bearish the FT is quoting Gali ...,TSLA,"Tesla, Inc."
3,2023-01-08 23:59:12+00:00,@2Sexy4ThisTweet @samsonusmc @Tesla One way is...,TSLA,"Tesla, Inc."
4,2023-01-08 23:59:12+00:00,$TSLA Illustration simple de l impact de la ba...,TSLA,"Tesla, Inc."
...,...,...,...,...
1270,2023-01-08 14:10:03+00:00,The Fear Trade $TSLA $XPEV $NIO $NFLX $NVDA\nh...,XPEV,XPeng Inc.
1271,2023-01-08 13:59:47+00:00,@farzyness @LimitingThe Tesla‘s advantage lies...,XPEV,XPeng Inc.
1272,2023-01-08 13:54:14+00:00,$XPEV Top analyst price target for stocks for ...,XPEV,XPeng Inc.
1273,2023-01-08 13:44:43+00:00,$XPEV Top analyst price target for stocks for ...,XPEV,XPeng Inc.


### Scraping Yahoo Finances

In [63]:
import requests

In [175]:
def getdata(url, user_agent):
    req = requests.get(url, headers = {'User-Agent': user_agent})
    return pd.read_html(req.text)

In [178]:
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'

In [188]:
companies = list(tweets['Stock Name'].unique())
df_list = []

for ticker in companies:

    historical_url= f'https://finance.yahoo.com/quote/{ticker}/history?p={ticker}'
    historical_data = getdata(historiscal_url, user_agent)
    data = historical_data[0]
    
    data['Stock Price'] = ticker
    data['Date'] = pd.to_datetime(data['Date'][0:len(data)-1])
    globals()[ticker] = data[0:len(data)-1][data[0:len(data)-1]['Date'] > stocks['Date'].max()]
    df_list.append(globals()[ticker])

In [200]:
df = pd.concat(df_list)
df

Unnamed: 0,Date,Open,High,Low,Close*,Adj Close**,Volume,Stock Price
0,2023-02-01,10.96,11.30,10.60,11.13,11.13,20403200,TSLA
1,2023-01-31,10.20,10.82,10.08,10.64,10.64,21769400,TSLA
2,2023-01-30,10.32,10.42,9.87,10.02,10.02,19064600,TSLA
3,2023-01-27,10.16,11.31,10.06,10.69,10.69,27900800,TSLA
4,2023-01-26,10.45,10.46,9.97,10.24,10.24,16775400,TSLA
...,...,...,...,...,...,...,...,...
80,2022-10-06,10.99,11.19,10.40,10.56,10.56,22461900,XPEV
81,2022-10-05,11.53,11.55,10.67,10.99,10.99,38984800,XPEV
82,2022-10-04,12.15,12.38,11.81,11.98,11.98,22802700,XPEV
83,2022-10-03,11.87,11.93,11.42,11.73,11.73,22300300,XPEV
