In [1]:
from twitter_scraper.clean.users import USER_DTYPE
from twitter_scraper.clean.tweets import TWEET_DTYPE
from twitter_scraper import settings
from twitter_scraper import utils

import matplotlib.pyplot as plt
import datetime as dt
import pandas as pd

import matplotlib.style
import matplotlib as mpl

mpl.style.use('seaborn-v0_8-darkgrid')
default_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


ANALYSIS_MIN_DATE = dt.datetime.fromisoformat('2022-11-01T00:00:00+00:00')
ANALYZE_COLUMNS = [
    'name',
    'screen_name',
    'location',
    'is_croatian',
    'description',
    
    'followers_count',
    'friends_count',
    'favourites_count',
    'favorite_cnt', # sum by tweet favorite_count
    'statuses_count',
    
    'total_tweets_cnt',
    'original_tweets_cnt',
    'original_tweets_pct',
    'retweet_tweets_cnt',
    'retweet_tweets_pct',
    'reply_tweets_cnt',
    'reply_tweets_pct',
    'quote_tweets_cnt',
    'quote_tweets_pct',
    
    'protected',
    'verified',
    'created_at',
    'clean_location'
]

clean_users_dfs = utils.read_directory_files(
    directory=settings.CLEAN_USERS_DIR, 
    read_fn=pd.read_csv, 
    dtype=USER_DTYPE,
    parse_dates=['created_at']
)
clean_tweets_dfs = utils.read_directory_files(
    directory=settings.CLEAN_TWEETS_DIR, 
    read_fn=pd.read_csv, 
    dtype=TWEET_DTYPE,
    parse_dates=['created_at', 'retweet_created_at']
)

users_df = pd.concat(clean_users_dfs).set_index('user_id')
tweets_df = pd.concat(clean_tweets_dfs)
tweets_df = tweets_df.loc[tweets_df['created_at'] > ANALYSIS_MIN_DATE].copy()
users_df = users_df[ANALYZE_COLUMNS]
_users_df = users_df.loc[tweets_df.user_id.unique()]

In [3]:
tweets_df[tweets_df['id'] == 1592495575660130305]

Unnamed: 0,id,user_id,user_id_str,full_text,created_at,hashtags,user_mentions,retweet_count,retweet_from_user_id,retweet_from_screen_name,...,year,quarter,quarter_name,month,month_name,week,week_name,day,day_name,folder_name
7491093,1592495575660130305,1243648845021577229,1243648845021577229,"'Crveni val' nije pomeo Ameriku, ali ni demokr...",2022-11-15 12:31:47+00:00,[],['tportal'],0,,,...,2022,4,2022Q4,11,2022-11,46,2022-46,15,2022-11-15,2022-12-04
140587,1592495575660130305,1243648845021577229,1243648845021577229,"'Crveni val' nije pomeo Ameriku, ali ni demokr...",2022-11-15 12:31:47+00:00,[],['tportal'],0,,,...,2022,4,2022Q4,11,2022-11,46,2022-46,15,2022-11-15,2022-12-04
140595,1592495575660130305,1243648845021577229,1243648845021577229,"'Crveni val' nije pomeo Ameriku, ali ni demokr...",2022-11-15 12:31:47+00:00,[],['tportal'],0,,,...,2022,4,2022Q4,11,2022-11,46,2022-46,15,2022-11-15,2022-12-04


In [8]:
_users_df

Unnamed: 0_level_0,name,screen_name,location,is_croatian,description,followers_count,friends_count,favourites_count,favorite_cnt,statuses_count,...,retweet_tweets_cnt,retweet_tweets_pct,reply_tweets_cnt,reply_tweets_pct,quote_tweets_cnt,quote_tweets_pct,protected,verified,created_at,clean_location
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3705765382,CrvenikrizDubrovnik,GDCK_Dubrovnik,"Republic of Croatia, Dubrovnik",True,"Crveni križ provodi unapređenje zdravlja, soci...",145,388,665,198,363,...,222,0.608219,3,0.008219,67,0.183562,False,False,2015-09-19 09:30:37+00:00,Hrvatska
1282607267628699648,Regional Development Agency SIMORA,SIMORA_Croatia,"Sisak, Croatia",True,Regional Development Agency of Sisak-Moslavina...,56,47,1484,544,455,...,130,0.264228,29,0.058943,14,0.028455,False,False,2020-07-13 09:26:28+00:00,Hrvatska
1055166606974828545,Fran Kovačić,Fran56003149,Republic of Croatia,True,"19, 🇭🇷",11,178,2,4,19,...,0,0.000000,18,0.857143,0,0.000000,False,False,2018-10-24 18:38:17+00:00,Hrvatska
120903931,Ender The Silent,Misek_Chipsy,Croatia,True,"Every time i get bored, I just stop being bore...",48,470,66,246,593,...,9,0.021327,274,0.649289,7,0.016588,False,False,2010-03-07 23:11:32+00:00,Hrvatska
3089524283,Vlatka Butkovic 🎶,Butkovicedu,"Zagreb, Croatia",True,@CEBCroatia Program Director/ @CEESAorg Global...,970,1907,4179,465,5092,...,117,0.145704,45,0.056040,160,0.199253,False,False,2015-03-12 18:15:18+00:00,Hrvatska
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762559004547620864,Nenad Tkalčić,NTkalcic,Zagreb,True,,66,511,2046,2110,1693,...,72,0.042604,1369,0.810059,73,0.043195,False,False,2016-08-08 08:00:09+00:00,Zagreb
405712013,Zlatan Soldo,ZlatanSoldo,"Zagreb, Croatia",True,"učitelj, nastavnik informatike i računalstva u...",29,54,996,28,44,...,2,0.043478,28,0.608696,0,0.000000,False,False,2011-11-05 17:52:25+00:00,Hrvatska
4007812109,branko krivokuca,BKrivokuca,Croatia,True,,845,4748,5189,3,2840,...,475,0.975359,6,0.012320,15,0.030801,False,False,2015-10-21 07:23:01+00:00,Hrvatska
9841492,Igor Vrdoljak,ivrdoljak,Zagreb,True,"Co-founder of @netgentweets, father, husband. ...",564,568,505,72,2379,...,167,0.830846,11,0.054726,13,0.064677,False,False,2007-11-01 01:28:25+00:00,Zagreb
