# Environment Set Up

In [None]:
# G drive connection
# learn: to access a shared file just put a shortcut in mi drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Libraries import

import pandas as pd
from tabulate import tabulate
from matplotlib import pyplot as plt
import seaborn as sns

# Connection test

In [None]:
# Verify reading folder

!ls '/content/drive/MyDrive/andresrokp/Thesis Juandavid/'

In [None]:
# Hola mundo

file_path = '/content/drive/MyDrive/andresrokp/Thesis Juandavid/Companies.dta'
df = pd.read_stata(file_path)
print('\n---\ninfo\n---')
df.info()  # Show column
print('\n---\ndescribe\n---')
print(df.describe())  # Show very basic stadistics
print('\n---\nhead\n---')
df.head()  # Show first rows


# Dataset loading

In [None]:
# File paths
path_companies_file = '/content/drive/MyDrive/andresrokp/Thesis Juandavid/Companies.dta'
path_ipo_m_a_file = '/content/drive/MyDrive/andresrokp/Thesis Juandavid/IPO_M&A.dta'
path_investments_file = '/content/drive/MyDrive/andresrokp/Thesis Juandavid/Investments.dta'
path_investors_file = '/content/drive/MyDrive/andresrokp/Thesis Juandavid/Investors.dta'
path_vico = '/content/drive/MyDrive/andresrokp/Thesis Juandavid/VICO5.0_Investors_Reputation_withOldIDs.dta'

# Read Companies data
print("Reading Companies data...")
df_companies = pd.read_stata(path_companies_file)
print("\nCompanies Data Info:")
df_companies.info()

# Read IPO M&A data
print("\nReading IPO M&A data...")
df_ipo_ma = pd.read_stata(path_ipo_m_a_file)
print("\nIPO M&A Data Info:")
df_ipo_ma.info()

# Read Investments data
print("\nReading Investments data...")
df_investments = pd.read_stata(path_investments_file)
print("\nInvestments Data Info:")
df_investments.info()

# Read Investors data
print("\nReading Investors data...")
df_investors = pd.read_stata(path_investors_file)
print("\nInvestors Data Info:")
df_investors.info()

# Read VICO5 data
print("\nReading vico5_BNT data...")
df_vico5_BNT = pd.read_stata(path_vico)
print("\nVICO5 Data Info:")
df_vico5_BNT.info()
print("Number of Rows in vico5_BNT Data:", len(df_vico5_BNT))

In [None]:
'''
1st model: Wahal, (2004). Sorensen, (2007)

Used tables:
'df_investors',
'df_vico5_BNT'
'''

# Left JOIN de las tablas
# df_vico5_BNT >>InvestorID>> df_investors
merged_investments_investors = pd.merge(df_vico5_BNT, df_investors, on='InvestorID', how='left')

print('-\n--\n-')

# cleaning dejar solo VCs
df_Investments_merge_clean = merged_investments_investors[merged_investments_investors['InvestorType'] != 'BA']
df_Investments_merge_clean = df_Investments_merge_clean[df_Investments_merge_clean['InvestorType'] != 'Other']
df_Investments_merge_clean = df_Investments_merge_clean[~df_Investments_merge_clean['InvestorID'].str.contains('missing')]
print( 'df_Investments_merge_clean.info()', df_Investments_merge_clean.info() )
print( 'len (df_Investments_merge_clean)', len(df_Investments_merge_clean) )
# print(df_Investments_no_BAs.head(5))


col_InvestorID_clean = df_Investments_merge_clean['InvestorID']
# col_InvestorID_clean = pd.merge(df_Investments_bnt, df_Investors, on='InvestorID', how='left')['InvestorName']
# print(df_Investments_clean.head(5))

numberOf_Unique_InvestorsClean = len(col_InvestorID_clean.unique())
print( 'numberOf_Unique_InvestorsClean', numberOf_Unique_InvestorsClean )

uniqueInvestorsAppereanceCount = col_InvestorID_clean.value_counts()
print('uniqueInvestorsAppereanceCount\n', uniqueInvestorsAppereanceCount)
print( 'len( uniqueInvestorsAppereanceCount )', len( uniqueInvestorsAppereanceCount ) )
print( 'uniqueInvestorsAppereanceCount', uniqueInvestorsAppereanceCount )


print("\n\nInvestorName_Final conteo de apariciones TOP 25%")
col_InvestorID_clean.value_counts().head(round(numberOf_Unique_InvestorsClean*0.25))

# Helpers Functions

In [None]:
def akap_pretty_print_df(df, rows):
    columns = df.columns.tolist()
    ellipsis_values = ['...' for _ in columns]
    ellipsis_row = pd.DataFrame([ellipsis_values], columns=columns)
    head = df.head(rows)
    tail = df.tail(rows)
    print( tabulate(pd.concat([head,ellipsis_row,tail]),headers='keys', tablefmt='pretty') )
    return 'printed!'

def akap_txt_file_writer(df, file_name):
  file_path = f'/content/drive/MyDrive/andresrokp/Thesis Juandavid/{file_name}.txt'
  columns = df.columns.tolist()
  with open(file_path,'w') as txt_file:
    txt_file.write(tabulate(df, columns,tablefmt="pretty"))

# 1st Model

In [None]:
'''
1st model: Wahal, (2004). Sorensen, (2007)

Used tables:
'df_investors',
'df_vico5_BNT'
'''

# Left JOIN de las tablas
# df_vico5_BNT >>InvestorID>> df_investors
merged_investments_investors = pd.merge(df_vico5_BNT, df_investors, on='InvestorID', how='left')

print('-\n--\n-')

# CLEANING
# dejar solo VCs
df_Investments_merge_clean = (
    merged_investments_investors
    .loc[~((merged_investments_investors['InvestorType'] == 'BA') |
           (merged_investments_investors['InvestorType'] == 'Other') |
           (merged_investments_investors['InvestorID'].str.contains('missing')))]
)
df_Investments_merge_clean.info()
# conservar sólo columnas de interés
col_de_interes = ['InvestorID', 'InvestorName', 'InvestorNation', 'InvestorType']
df_Investments_merge_clean = df_Investments_merge_clean[col_de_interes]
# verificar
print( '\n<o>\n df_Investments_merge_clean.info() \n' )
df_Investments_merge_clean.info()

# AGRUPAMIENTOS
print('----------AGRUPAMIENTOS----------')
df_Investments_merge_clean_collapsed = df_Investments_merge_clean.groupby(['InvestorID', 'InvestorName', 'InvestorNation', 'InvestorType']) \
                                        .size() \
                                        .reset_index(name='InvestorCount') \
                                        .sort_values(by='InvestorCount', ascending=False) \
                                        .reset_index(drop=True)
print( '\n<o>\n info df_Investments_merge_clean_collapsed.info() \n' )
df_Investments_merge_clean_collapsed.info()
print( '\n<o>\n table df_Investments_merge_clean_collapsed \n' )
akap_pretty_print_df(df_Investments_merge_clean_collapsed, 10)


# MOCHADA
print("\n<o>\n info df_Investments_merge_clean_collapsed conteo de apariciones TOP 25% \n")
df_Investments_merge_clean_collapsed_top25 = df_Investments_merge_clean_collapsed.head(round(len(df_Investments_merge_clean_collapsed)*0.25))
akap_pretty_print_df(df_Investments_merge_clean_collapsed_top25, 20)



In [None]:
df_Investments_merge_clean_collapsed_top25

In [None]:
df_Investments_merge_clean_collapsed_top25.groupby('InvestorNation').size().plot(kind='barh', color=sns.palettes.mpl_palette('Dark2'))
plt.gca().spines[['top', 'right',]].set_visible(False)
plt.show()
print('.')
df_Investments_merge_clean_collapsed_top25.groupby('InvestorType').size().plot(kind='barh', color=sns.palettes.mpl_palette('Dark2'))
plt.gca().spines[['top', 'right',]].set_visible(False)
plt.show()
print('.')
df_Investments_merge_clean_collapsed_top25['InvestorCount'].plot(kind='line', figsize=(8, 4), title='InvestorCount')
plt.gca().spines[['top', 'right']].set_visible(False)
plt.show()
print('.')

# Modelo 4: Hochberg, Ljungqvist, and Lu (2007).

In [None]:
akap_pretty_print_df(df_investments, 5)

akap_txt_file_writer(df_investments, 'df_investments')

In [None]:
# Check runtime vars
# dir() keeps trak of the declared variables in the runtime

[var for var in dir() if '_' != var[0]]