# 0. Load Env

In [85]:
import json
import pandas as pd
from typing import List, Dict, Tuple, Any, Callable, Union
from functools import reduce
from operator import getitem

In [54]:
TWEET_DICT_KEY_MAPPING = [
    (("url",), "url",),
    (("content",), "content",),
    (("id",), "tweet_id",),
    (("user", "id",), "user_id",),
    (("user", "username",), "user_name",),
    (("date",), "datetime",),
]

In [55]:
def nested_dictionary_get(d: Dict, key: Tuple):
    return reduce(getitem, key, d)

def parse_and_reduce_tweet(tweet: Dict) -> Dict[str, Any]:
    """
    Parses a tweet (represented as a dictionary), reducing the tweet to the schema defined
    in `keys_set`
    """
    new_dict = {new_name: nested_dictionary_get(d=tweet, key=nested_key)
                for nested_key, new_name in TWEET_DICT_KEY_MAPPING}
    return new_dict

In [101]:
def load_and_parse_twarc_file_to_list_of_tweets(path: str, parse_tweet_fxn: Callable) -> List:
    with open(path, 'rb') as f:
        return [parse_tweet_fxn(json.loads(line)) for line in f]

In [102]:
def load_twarc_json_file_to_dataframe(path: str, parse_tweet_fxn: Callable) -> pd.DataFrame:
    tweets_list = load_and_parse_twarc_file_to_list_of_tweets(path=path, parse_tweet_fxn=parse_and_reduce_tweet)
    tweet_df = pd.DataFrame.from_records(tweets_list)
    return tweet_df

def load_twarc_json_files_to_dataframe(paths: List[str], parse_tweet_fxn: Callable) -> pd.DataFrame:
    tweet_dfs = [load_twarc_json_file_to_dataframe(path, parse_tweet_fxn) for path in paths]
    tweet_df = pd.concat(tweet_dfs)
    return tweet_df

# 1. Proof of Concept

#### Load tweets to a dataframe

In [103]:
paths = [
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/Arsenal_Strngth.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/BeTitanFit.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/BodySolidFit.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/BodycraftFit.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/CAPBarbell84.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/Gym Scrape.zip",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/HOISTFitness.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/HammerStrength.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/LifeFitness.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/PRxPerformance.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/Powertec.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/RepFitnessEquip.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/RogueFitness.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/Sorinex.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/TorqueFitness.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/bellsofsteel.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/concept2.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/cybex.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/elitefts.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/force_usa.jsonl",
    "/Users/b.feifke/code/home-gym-vendor-twitter-sentiment-analysis/data/onepeloton.jsonl"
]

In [106]:
tweet_df = load_twarc_json_file_to_dataframe(path=paths[0], parse_tweet_fxn=parse_and_reduce_tweet)

In [107]:
tweet_df.head().T

Unnamed: 0,0,1,2,3,4
url,https://twitter.com/Ayoobi77/status/1567340537...,https://twitter.com/Arsenal_Strngth/status/156...,https://twitter.com/Arsenal_Strngth/status/156...,https://twitter.com/Ayoobi77/status/1566312069...,https://twitter.com/Ayoobi77/status/1566311005...
content,@Arsenal_Strngth The true face of sports engin...,Shipping soon: black on black Reloaded Iso Lat...,@Ayoobi77 @Collin_Abel10 @EaglevilleFB @CoachF...,@Collin_Abel10 @Arsenal_Strngth @EaglevilleFB ...,@Arsenal_Strngth Excellent products from Arsen...
tweet_id,1567340537333837824,1567145753360977922,1567144502334308354,1566312069196124166,1566311005038297089
user_id,787689201231888384,2942005261,2942005261,787689201231888384,787689201231888384
user_name,Ayoobi77,Arsenal_Strngth,Arsenal_Strngth,Ayoobi77,Ayoobi77
datetime,2022-09-07T02:34:38+00:00,2022-09-06T13:40:38+00:00,2022-09-06T13:35:40+00:00,2022-09-04T06:27:52+00:00,2022-09-04T06:23:38+00:00


#### Enrich Tweets with sentiment