# NYT Data Notebook

This notebook produces the table of results for the NYT data

## Modules

In [1]:
import pandas as pd
import dataframe_image as dfi
from datetime import datetime

## Data Cleaning (Twitter API)

### Initial Data Summary

In [2]:
def parse_datetime(dt_str: str):
    """
    This function parses the datetime of a string into a python datetime
    dt_str: Raw datetime string
    returns: Datetime as a datetime type variable
    """
    return datetime.strptime(dt_str,'%m/%d/%Y').date()

In [3]:
# Defining nicer looking companie labels
company_mapping = {
    "adani group":"Adani Group",
    "ftx":"FTX",
    "microsoft":"Microsoft",
    "google":"Google",
    "air canada":"Air Canada",
    "amazon":"Amazon",
    "apple ":"Apple ",
    "samsung":"Samsung",
    "meta":"Meta",
    "intel":"Intel",
    "bose":"Bose",
    "fia":"FIA"
}
    

In [4]:
# Reading in the data
nyt = pd.read_csv("data/Api_data.csv", header=None)
nyt.columns = columns=["company", "source", "date", "text"]

In [5]:
companies = nyt["company"].unique()

nyt['date'] = nyt['date'].apply(lambda date: parse_datetime(date))

data_all = []
for company in companies:
    
    data = {}
    
    df = nyt[nyt["company"] == company]
    
    
    # Creating summary data
    data['Company'] = company_mapping[company]
    data['Start Date'] = df['date'].min()
    data['End Date'] = df['date'].max()
    data['Number of Articles'] = len(df)
    
    data_all.append(data)

In [6]:
# Converting to a pandas data frame
data_summary = pd.DataFrame(data_all)

In [7]:
# Setting the index
data_summary.set_index('Company', inplace=True)

In [8]:
# Removing unneeded companies
data_summary = data_summary[data_summary.index.isin(["Air Canada","Microsoft",  "Meta", "FIA", "Adani Group", "FTX"])]

In [9]:
# Getting the total number of articles
print("Total Number of Articles: ", data_summary['Number of Articles'].sum())

Total Number of Articles:  3834


In [10]:
# Setting table styles and writing to disk
data_summary = data_summary.style.set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
data_summary.set_properties(**{'text-align': 'center'})
dfi.export(data_summary, 'results/nyt_data.png')

[0330/083843.971916:INFO:headless_shell.cc(107)] 35284 bytes written to file /tmp/tmpkxgb5ckr/temp.png
