# Get messages of each user in a particular period of time, from Zocial Eye Excel file

In [None]:
from typing import List

import datetime
import pandas as pd

##############

# Accounts input (sampled users)
accounts_file_name = "edang-firstvoters-info-samples.xlsx"
accounts_sheet_name = "samples"
screen_name_col = "screen_name"

# Messages input
msg_file_name = "edang-2019-01-21-2019-05-25.xlsx"
msg_sheet_name = "twitter"
msg_account_col = "Account"
msg_msg_col = "Message"
msg_date_col = "Post time"

#############

# datetime format from Zocial Eye Excel export
datetime_format = '%Y-%m-%d %H:%M:%S'  # 2019-02-15 15:50:56

# the election day
election_date = datetime.datetime(2019, 3, 24)

start_date = datetime.datetime(2019, 3, 23) 
end_date = datetime.datetime(2019, 3, 25)

## Read messages

In [None]:
msg_df = pd.read_excel(msg_file_name, sheet_name=msg_sheet_name, usecols=[msg_account_col, msg_msg_col, msg_date_col])

# Use only date part, discards time
msg_df[msg_date_col] = pd.to_datetime(msg_df[msg_date_col]).dt.normalize()
msg_df.head()

## Read sampled user list

In [None]:
screen_names_df = pd.read_excel(accounts_file_name, sheet_name=accounts_sheet_name, usecols=[screen_name_col])
screen_names = screen_names_df["screen_name"].drop_duplicates().values.tolist()
print(f"Screen names: {screen_names[:1]} .. {screen_names[-1:]}")
print(f"Total: {len(screen_names):,}")

## Get message within time range

In [None]:
msg_df = msg_df[(msg_df["Post time"] >= start_date) & (msg_df["Post time"] <= end_date)]

## Get message only for accounts in sample list

In [None]:
msg_df = msg_df[msg_df["Account"].isin(screen_names)].sort_values("Account")
msg_df