# Hassan, et al. (2019)

> Firm-level risk and sentiment derived from quarterly earnings conference calls

This module downloads and processes data developed by:

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, 2019, “Firm-Level Political Risk: Measurement and Effects,”  Quarterly Journal of Economics, 134 (4), pp.2135-2202. <https://doi.org/10.1093/qje/qjz021>.

The dataset also contains data developed by the papers below, but we will not use it in this module:

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, "The Global Impact of Brexit Uncertainty," 2020, NBER Working Paper 26609

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Markus Schwedeler, Ahmed Tahoun, "Firm-level Exposure to Epidemic Diseases: Covid-19, SARS, and H1N1," 2020, mimeo London Business School. Avilable at SSRN: <https://ssrn.com/abstract=3566530>

See the authors' dedicated website for more information on this dataset: <https://www.firmlevelrisk.com/>

In [None]:
#| default_exp papers.hassan_etal_2019

In [None]:
#| export
from __future__ import annotations
import pandas as pd

import pandasmore as pdm
from finsets import wrds, RESOURCES, dataloader

In [None]:
#| exports
PROVIDER = 'Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, 2019'
URL = 'https://www.dropbox.com/s/96xo9f1twlu3525/firmquarter_2022q1.csv?raw=1'
HOST_WEBSITE = 'https://www.firmlevelrisk.com/'
FREQ = 'Q'
MIN_YEAR = 2002
MAX_YEAR = 2022
ENTITY_ID_IN_RAW_DSET = 'gvkey'
ENTITY_ID_IN_CLEAN_DSET = 'permno'
TIME_VAR_IN_RAW_DSET = 'date'
TIME_VAR_IN_CLEAN_DSET = 'Qdate'
LABELS_FILE = RESOURCES/'compa_variable_descriptions.csv'

In [None]:
#| export
def variables():
    """Names of key variables in the dataset. 
    `company_name`,`hqcountrycode`,`isin`,`cusip`,`ticker` are also available but are omitted here to speed things up and save memory."""
    
    return ['gvkey','date','date_earningscall',
            'PRisk','NPRisk','Risk',
            'PSentiment','NPSentiment','Sentiment',
            'PRiskT_economic','PRiskT_environment','PRiskT_trade','PRiskT_institutions','PRiskT_health','PRiskT_security','PRiskT_tax','PRiskT_technology']

In [None]:
variables()

['gvkey',
 'date',
 'date_earningscall',
 'PRisk',
 'NPRisk',
 'Risk',
 'PSentiment',
 'NPSentiment',
 'Sentiment',
 'PRiskT_economic',
 'PRiskT_environment',
 'PRiskT_trade',
 'PRiskT_institutions',
 'PRiskT_health',
 'PRiskT_security',
 'PRiskT_tax',
 'PRiskT_technology']

In [None]:
#| export
def download(url: str=URL, 
            vars: list=variables(), # Which variables to download
            obs_limit: int=None, # How many rows to download. If None, all rows are downloaded
            delimiter: str='\t'
            ) -> pd.DataFrame:
    """Download raw data from `url`"""
    
    return dataloader.get_text_file_from_url(url, nrows=obs_limit, delimiter=delimiter, usecols=vars)

In [None]:
download(obs_limit=3, vars=['gvkey','date', 'PRisk', 'Risk','PSentiment','Sentiment'])

Unnamed: 0,gvkey,date,PRisk,Risk,PSentiment,Sentiment
0,1004,2002q1,359.55072,168.98235,997.86415,469.39542
1,1004,2002q2,0.0,0.0,1594.7321,544.82417


In [None]:
#| eval: false
raw = download()

In [None]:
#| eval: false
raw.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 354518 entries, 0 to 354517
Data columns (total 17 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   gvkey                354518 non-null  int64  
 1   date                 354518 non-null  object 
 2   PRisk                353437 non-null  float64
 3   NPRisk               353437 non-null  float64
 4   Risk                 353437 non-null  float64
 5   PSentiment           353437 non-null  float64
 6   NPSentiment          353437 non-null  float64
 7   Sentiment            353437 non-null  float64
 8   PRiskT_economic      353437 non-null  float64
 9   PRiskT_environment   353437 non-null  float64
 10  PRiskT_trade         353437 non-null  float64
 11  PRiskT_institutions  353437 non-null  float64
 12  PRiskT_health        353437 non-null  float64
 13  PRiskT_security      353437 non-null  float64
 14  PRiskT_tax           353437 non-null  float64
 15  PRiskT_technology

In [None]:
#| export
def clean(df: pd.DataFrame=None, # If None, will download using `download_raw`
          gvkey_permno_link: bool|pd.DataFrame=True, # Whether to download permno or not. If DataFrame, must contain `permno`, `gvkey`, and `Qdate`
          how: str='inner' # How to merge permno into `df` if `gvkey_permno_link` is not False
          ) -> pd.DataFrame:
    """Converts `gvkey` to string and applies `pandasmore.setup_panel`. Adds `permno` if `gvkey_permno_link` is not False."""

    if df is None: df = download()
    else: df = df.copy()

    df['gvkey'] = df['gvkey'].astype('string').str.zfill(6)
    df['date'] = df['date'].astype('string')

    # Format date variable so it can be converted into datetime (as the last day of the quarter)
    year = df['date'].str.slice(0, 4).astype('string')
    quarter = df['date'].str.slice(5, 6).astype('int')

    last_month = (quarter * 3).astype('string').str.zfill(2)
    last_day = last_month.map({'03': '31', '06': '30', '09': '30', '12': '31'})

    df['date'] = year + '-' + last_month + '-' + last_day

    df = pdm.setup_panel(df, panel_ids='gvkey', 
                        time_var='date', freq='Q',
                        panel_ids_toint=False,
                        drop_index_duplicates=True, duplicates_which_keep='last')
    if not gvkey_permno_link: return df
    else:    
      if gvkey_permno_link is True: gvkey_permno_link = wrds.linking.gvkey_permno_q()
      df = df.reset_index().merge(gvkey_permno_link, how=how, on=['gvkey','Qdate'])
      return pdm.setup_panel(df, panel_ids='permno', dates_processed=True, freq='Q',
                              drop_index_duplicates=True, duplicates_which_keep='last')

In [None]:
#| eval: false
df = clean(raw)

Loading library list...
Done


In [None]:
#| eval: false
df

Unnamed: 0_level_0,Unnamed: 1_level_0,date,dtdate,gvkey,PRisk,NPRisk,Risk,PSentiment,NPSentiment,Sentiment,PRiskT_economic,PRiskT_environment,PRiskT_trade,PRiskT_institutions,PRiskT_health,PRiskT_security,PRiskT_tax,PRiskT_technology,date_earningscall
permno,Qdate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
10001,2014Q2,2014-06-30,2014-06-30,012994,94.384437,418.03548,85.836910,3737.937800,10709.5570,1688.125900,1472.4680,1427.3330,153.83140,834.1546,1036.2480,1329.0460,619.58790,953.8204,16-May-2014
10001,2014Q3,2014-09-30,2014-09-30,012994,52.685563,283.10302,22.547914,271.234090,-23341.5090,45.095829,461.1490,550.6544,37.41568,267.4431,263.1918,187.0833,26.11517,0.0000,15-Aug-2014
10001,2014Q4,2014-12-31,2014-12-31,012994,84.720984,505.82624,72.516316,1985.533700,-39785.7910,870.195790,741.5504,885.4795,60.16626,430.0617,423.2254,300.8392,41.99448,0.0000,14-Nov-2014
10001,2015Q1,2015-03-31,2015-03-31,012994,160.354860,679.80435,59.265113,1882.599000,14314.7260,1086.527100,13185.4800,2472.8130,522.97300,1988.2930,2451.2430,1962.2720,3618.88900,514.4636,13-Mar-2015
10001,2015Q2,2015-06-30,2015-06-30,012994,102.572640,326.61838,87.796313,94.565626,14089.6600,746.268660,897.8033,1072.0600,72.84396,520.6804,512.4037,364.2294,50.84318,0.0000,12-May-2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93436,2021Q1,2021-03-31,2021-03-31,184996,70.162735,2423.95510,32.485111,1048.529400,8259.9438,1136.978900,1386.3150,1225.8070,174.75850,431.1377,817.0463,665.5459,224.10700,566.9141,27-Jan-2021
93436,2021Q2,2021-06-30,2021-06-30,184996,114.776580,1048.82540,79.051383,275.735390,27480.0860,564.652740,2340.8990,5368.5470,2355.19500,1962.5090,2233.1690,2623.3820,2770.03300,1947.7700,26-Apr-2021
93436,2021Q3,2021-09-30,2021-09-30,184996,81.427234,444.38115,94.573827,2018.630100,14960.6620,461.047410,3585.3900,3239.3540,1326.35600,1787.7950,2409.2520,3233.9570,4120.24400,2622.4820,26-Jul-2021
93436,2021Q4,2021-12-31,2021-12-31,184996,144.372650,1305.04820,152.936380,1277.480700,3896.5935,723.898860,6081.8830,4283.5880,2538.47000,2494.1530,6012.9180,5502.4500,6379.86200,3413.7940,20-Oct-2021


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()