In [1]:
%matplotlib notebook

# Analysis of Top Investment Banks

## Research Questions to Answer:

* What is the market share of each of the top investment banks in terms of revenue and assets?
* How has the revenue and profitability of the top investment banks evolved over the past five years?
* What are the key drivers of revenue and profitability for investment banks?
* What is the geographic distribution of revenue and assets for the top investment banks?
* How do the top investment banks compare in terms of risk management and governance?

In [50]:
import pandas as pd
import numpy as np
import json
import requests
import pprint
import matplotlib.pyplot as plt

In [None]:
#1##################################################################################################

In [None]:
url = 'https://data.sec.gov/submissions/CIK'

In [None]:
banks_dictionary = { 'Goldman Sachs': '0000886982','JPMorgan': '0000019617',
                    'Morgan Stanley': '0000895421','BofA Securities': '0000070858','Jeffries':'0000096223',
                    'Citi': '0000831001','Barclays': '0000312070','Credit Suisse': '0001321482',
                    'RBC': '0000050916','Deutsche Bank': '0001159508'}
print(banks_dictionary)

In [None]:
 # Set the API base URL
url = 'https://data.sec.gov/submissions/CIK'

print("Beginning Data Retrieval     ")
print("-----------------------------")

bank_data = []

for bank in banks_dictionary:
    CIK = banks_dictionary[bank]
    query = url + CIK
    query_json = query + '.json'

    response = requests.get(query_json, headers={'user-agent':'Mozilla/5.0'})
    
    
    bank = response.json()['name']
    fiscal_year = response.json()['fiscalYearEnd']
    filings = response.json()['filings']
    
    bank_data.append({"Bank": bank, "Fiscal Year": fiscal_year})

print(bank_data)



In [None]:

bank_df = pd.DataFrame(bank_data)
bank_df

In [None]:
bank_df.to_csv("DATA/banks.csv", index_label="ID")

In [None]:
bank_df = pd.read_csv("DATA/banks.csv", index_col="ID")

In [None]:
bank_df

In [None]:
#2##################################################################################################

In [None]:
url = 'https://data.sec.gov/submissions/CIK'

In [None]:
CIK = '0000886982'

In [None]:
query = url + CIK
query_json = query + '.json'
print(query_json)

In [None]:
response = requests.get(query_json, headers={'user-agent':'Mozilla/5.0'})
json_response = response.json()

In [None]:
print(json_response)

In [None]:
with open('./test_json.json', 'w') as test_file:
    json.dump(json_response, test_file, indent=4, sort_keys=True)
 

In [None]:
print(json_response.keys())

In [None]:
#some of Adam's notes:

#pd.DataFrame([{'name':'doc1','a':'1'},{'name':'doc2','b':'2'},{'name':'doc3','a':'3'},])

# Script1: Loop through list of companies, call API, save data
#### OPTIONAL get other data with other scripts
# Script 2: Load data into data frame and clean
# Script3: Perform data  analysis

In [None]:
#3###############################################################################################
# run top cells then start from here 

In [150]:
bank_data_to_load = "DATA/investment_banking.csv"

bank_data_df = pd.read_csv(bank_data_to_load)


bank_data_df.head()

Unnamed: 0,Bank,Fiscal Year,Total IB fees,Market Share %,Net Interest Income,Non interest income,Total Revenue,Total non-interest expense,Net Income,Total Assets,Total Liabilities,IB fees as a % of net revenue,IB fees as a % of non-interest income,Unnamed: 13
0,Jefferies Financial Group Inc.,2021,"$4,365,699.00",,"$88,782.00","$8,096,547.00","$8,185,329.00","$5,759,721.00","$1,677,376.00","$60,404,110.00","$49,674,070.00",$0.53,$0.54,
1,Jefferies Financial Group Inc.,2020,"$2,501,494.00",,"$997,555.00","$5,013,319.00","$6,010,874.00","$4,783,438.00","$768,410.00","$53,118,352.00","$43,530,151.00",$0.42,$0.50,
2,Jefferies Financial Group Inc.,2019,"$1,526,992.00",,"$1,603,940.00","$2,289,036.00","$3,892,976.00","$3,530,186.00","$962,563.00","$49,460,234.00","$39,706,945.00",$0.39,$0.67,
3,Jefferies Financial Group Inc.,2018,"$1,904,870.00",,"$1,294,325.00","$2,469,709.00","$3,764,034.00","$3,435,708.00","$277,092.00","$47,131,095.00","$36,907,059.00",$0.51,$0.77,
4,Jefferies Financial Group Inc.,2017,"$1,764,285.00",,"$993,198.00","$3,084,247.00","$4,077,445.00","$3,294,840.00","$(35,784.00)","$47,169,108.00","$36,478,536.00",$0.43,$0.57,


In [151]:
bank_data_df["Total IB fees"].dtypes

dtype('O')

In [152]:
#To Do: Excel file numbers all measured in millions
# $ taken out in Excel  
# 

bank_data_df["Total IB fees"] = bank_data_df["Total IB fees"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Total IB fees"] = bank_data_df["Total IB fees"].map(lambda x: x.lstrip("$"))
bank_data_df["Total IB fees"] = pd.to_numeric(bank_data_df["Total IB fees"])

bank_data_df["Net Interest Income"] = bank_data_df["Net Interest Income"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Net Interest Income"] = bank_data_df["Net Interest Income"].map(lambda x: x.lstrip("$"))
bank_data_df["Net Interest Income"] = pd.to_numeric(bank_data_df["Net Interest Income"])

bank_data_df["Non interest income"] = bank_data_df["Non interest income"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Non interest income"] = bank_data_df["Non interest income"].map(lambda x: x.lstrip("$"))
bank_data_df["Non interest income"] = pd.to_numeric(bank_data_df["Non interest income"])

bank_data_df["Total Revenue"] = bank_data_df["Total Revenue"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Total Revenue"] = bank_data_df["Total Revenue"].map(lambda x: x.lstrip("$"))
bank_data_df["Total Revenue"] = pd.to_numeric(bank_data_df["Total Revenue"])

bank_data_df["Total non-interest expense"] = bank_data_df["Total non-interest expense"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Total non-interest expense"] = bank_data_df["Total non-interest expense"].map(lambda x: x.lstrip("$"))
bank_data_df["Total non-interest expense"] = pd.to_numeric(bank_data_df["Total non-interest expense"])

bank_data_df["Net Income"] = bank_data_df["Net Income"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Net Income"] = bank_data_df["Net Income"].map(lambda x: x.lstrip("$"))
bank_data_df["Net Income"] = pd.to_numeric(bank_data_df["Net Income"])


bank_data_df["Total Assets"] = bank_data_df["Total Assets"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Total Assets"] = bank_data_df["Total Assets"].map(lambda x: x.lstrip("$"))
bank_data_df["Total Assets"] = pd.to_numeric(bank_data_df["Total Assets"])

bank_data_df["Total Liabilities"] = bank_data_df["Total Liabilities"].astype(str).str.replace("\D+","",regex=True)
bank_data_df["Total Liabilities"] = bank_data_df["Total Liabilities"].map(lambda x: x.lstrip("$"))
bank_data_df["Total Liabilities"] = pd.to_numeric(bank_data_df["Total Liabilities"])

#seems to work now but keep this for a while
#bank_data_df['Fiscal Year'] = bank_data_df['Fiscal Year'].astype('int64')

bank_data_df

Unnamed: 0,Bank,Fiscal Year,Total IB fees,Market Share %,Net Interest Income,Non interest income,Total Revenue,Total non-interest expense,Net Income,Total Assets,Total Liabilities,IB fees as a % of net revenue,IB fees as a % of non-interest income,Unnamed: 13
0,Jefferies Financial Group Inc.,2021,436569900.0,,8878200.0,809654700.0,818532900,575972100,167737600.0,6040411000,4967407000,$0.53,$0.54,
1,Jefferies Financial Group Inc.,2020,250149400.0,,99755500.0,501331900.0,601087400,478343800,76841000.0,5311835200,4353015100,$0.42,$0.50,
2,Jefferies Financial Group Inc.,2019,152699200.0,,160394000.0,228903600.0,389297600,353018600,96256300.0,4946023400,3970694500,$0.39,$0.67,
3,Jefferies Financial Group Inc.,2018,190487000.0,,129432500.0,246970900.0,376403400,343570800,27709200.0,4713109500,3690705900,$0.51,$0.77,
4,Jefferies Financial Group Inc.,2017,176428500.0,,99319800.0,308424700.0,407744500,329484000,3578400.0,4716910800,3647853600,$0.43,$0.57,
5,CREDIT SUISSE,2021,1444006000.0,,637380700.0,1852035000.0,2489415400,2094000200,178348100.0,82903696400,78052319800,$0.58,$0.78,
6,CREDIT SUISSE,2020,1331648000.0,,668239500.0,1847096000.0,2515335400,2002696300,299516900.0,92008201300,87183911900,$0.53,$0.72,
7,CREDIT SUISSE,2019,1147588000.0,,721690800.0,1590764000.0,2312455000,1793685100,352257500.0,80972436500,76476499000,$0.50,$0.72,
8,CREDIT SUISSE,2018,1205271000.0,,710491600.0,1410137000.0,2120628500,1753978700,203852000.0,77943841900,72489600000,$0.57,$0.85,
9,CREDIT SUISSE,2017,1212871000.0,,672996000.0,1472134000.0,2145129800,1939546300,100892900.0,81729344100,77399158400,$0.57,$0.82,


In [154]:


jeff_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'Jefferies Financial Group Inc.',['Total IB fees']]
jeff_plot_df = jeff_plot_df.iloc[::-1]

suisse_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'CREDIT SUISSE',['Total IB fees']]
suisse_plot_df = suisse_plot_df.iloc[::-1]

citi_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'Citi Bank',['Total IB fees']]
citi_plot_df = citi_plot_df.iloc[::-1]

bar_plot_df = bank_data_df.loc[bank_data_df['Bank'] == "Barclay's",['Total IB fees']]
bar_plot_df = bar_plot_df.iloc[::-1]

gs_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'GS',['Total IB fees']]
gs_plot_df = gs_plot_df.iloc[::-1]

jp_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'JPM',['Total IB fees']]
jp_plot_df = jp_plot_df.iloc[::-1]

rbc_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'RBC Bank',['Total IB fees']]
rbc_plot_df = rbc_plot_df.iloc[::-1]

deutsche_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'Deutsche Bank',['Total IB fees']]
deutsche_plot_df = deutsche_plot_df.iloc[::-1]

bac_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'BANK OF AMERICA NA',['Total IB fees']]
bac_plot_df = bac_plot_df.iloc[::-1]

ms_plot_df = bank_data_df.loc[bank_data_df['Bank'] == 'Morgan Stanley',['Total IB fees']]
ms_plot_df = ms_plot_df.iloc[::-1]

In [157]:
x_axis = np.sort(bank_data_df['Fiscal Year'].unique(), axis=-1)

x_tick_locations = [value for value in x_axis]
plt.figure(figsize=(6,4))


jeff, = plt.plot(x_axis, jeff_plot_df['Total IB fees'], marker ='o', color='blue', label="Jefferies")
suisse, = plt.plot(x_axis, suisse_plot_df['Total IB fees'], marker ='o', color='green', label="CS")
citi, = plt.plot(x_axis, citi_plot_df['Total IB fees'], marker ='o', color='red', label="CitiBank")
bar, = plt.plot(x_axis, bar_plot_df['Total IB fees'], marker ='o', color='purple', label="Barclay's")
gs, = plt.plot(x_axis, gs_plot_df['Total IB fees'], marker ='o', color='darkblue', label="Goldman Sachs")
jp, = plt.plot(x_axis, jp_plot_df['Total IB fees'], marker ='o', color='pink', label="JP Morgan")
rbc, = plt.plot(x_axis, rbc_plot_df['Total IB fees'], marker ='o', color='yellow', label="Royal Bank of Canada")
d, = plt.plot(x_axis, deutsche_plot_df['Total IB fees'], marker='o', color='orange', label="Deutsche Bank")
bac, = plt.plot(x_axis, bac_plot_df['Total IB fees'], marker='o', color='darkred', label="Morgan Stanley")
ms, = plt.plot(x_axis, ms_plot_df['Total IB fees'], marker='o', color='darkgreen', label="Morgan Stanley")
#plt.xticks(x_tick_locations,1)
plt.title("Investment Banking Fees")
plt.xlabel("Fiscal Year")
plt.ylabel("Investment Banking Fees (millions)")
plt.show()

<IPython.core.display.Javascript object>

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [7]:
# Split up our data into groups based upon 'Home Library Definition' and 'Patron Type Definition'
banking_groups = bank_data_df.groupby(['Bank','Fiscal Year'])

# Create a new variable that holds the sum of our groups
new_banking_groups = banking_groups[['Total IB fees']]
new_banking_groups

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fd438ba7610>