In [1]:
%matplotlib notebook

# Analysis of Top Investment Banks

## Research Questions to Answer:

* What is the market share of each of the top investment banks in terms of revenue and assets?
* How has the revenue and profitability of the top investment banks evolved over the past five years?
* What are the key drivers of revenue and profitability for investment banks?
* What is the geographic distribution of revenue and assets for the top investment banks?
* How do the top investment banks compare in terms of risk management and governance?

In [20]:
import pandas as pd
import numpy as np
import json
import requests
import pprint
import matplotlib.pyplot as plt

In [None]:
#1##################################################################################################

In [None]:
url = 'https://data.sec.gov/submissions/CIK'

In [None]:
banks_dictionary = { 'Goldman Sachs': '0000886982','JPMorgan': '0000019617',
                    'Morgan Stanley': '0000895421','BofA Securities': '0000070858','Jeffries':'0000096223',
                    'Citi': '0000831001','Barclays': '0000312070','Credit Suisse': '0001321482',
                    'RBC': '0000050916','Deutsche Bank': '0001159508'}
print(banks_dictionary)

In [None]:
 # Set the API base URL
url = 'https://data.sec.gov/submissions/CIK'

print("Beginning Data Retrieval     ")
print("-----------------------------")

bank_data = []

for bank in banks_dictionary:
    CIK = banks_dictionary[bank]
    query = url + CIK
    query_json = query + '.json'

    response = requests.get(query_json, headers={'user-agent':'Mozilla/5.0'})
    
    
    bank = response.json()['name']
    fiscal_year = response.json()['fiscalYearEnd']
    filings = response.json()['filings']
    
    bank_data.append({"Bank": bank, "Fiscal Year": fiscal_year})

print(bank_data)



In [None]:

bank_df = pd.DataFrame(bank_data)
bank_df

In [None]:
bank_df.to_csv("DATA/banks.csv", index_label="ID")

In [None]:
bank_df = pd.read_csv("DATA/banks.csv", index_col="ID")

In [None]:
bank_df

In [None]:
#2##################################################################################################

In [None]:
url = 'https://data.sec.gov/submissions/CIK'

In [None]:
CIK = '0000886982'

In [None]:
query = url + CIK
query_json = query + '.json'
print(query_json)

In [None]:
response = requests.get(query_json, headers={'user-agent':'Mozilla/5.0'})
json_response = response.json()

In [None]:
print(json_response)

In [None]:
with open('./test_json.json', 'w') as test_file:
    json.dump(json_response, test_file, indent=4, sort_keys=True)
 

In [None]:
print(json_response.keys())

In [None]:
#some of Adam's notes:

#pd.DataFrame([{'name':'doc1','a':'1'},{'name':'doc2','b':'2'},{'name':'doc3','a':'3'},])

# Script1: Loop through list of companies, call API, save data
#### OPTIONAL get other data with other scripts
# Script 2: Load data into data frame and clean
# Script3: Perform data  analysis

In [None]:
#3###############################################################################################
# run top cells then start from here 

In [30]:
bank_data_to_load = "DATA/investment_banking.csv"

bank_data_df = pd.read_csv(bank_data_to_load)

#seems to work now but keep this for a while
#bank_data_df['Fiscal Year'] = bank_data_df['Fiscal Year'].astype('int64')

bank_data_df


Unnamed: 0,Bank,Fiscal Year,Total IB fees,Market Share %,Net Interest Income,Non interest income,Total Revenue,Total non-interest expense,Net Income,Total Assets,Total Liabilities,IB fees as a % of net revenue,IB fees as a % of non-interest income,Unnamed: 13
0,Jefferies Financial Group Inc.,2021,"$4,365,699.00",,"$88,782.00","$8,096,547.00","$8,185,329.00","$5,759,721.00","$1,677,376.00","$60,404,110.00","$49,674,070.00",$0.53,$0.54,
1,Jefferies Financial Group Inc.,2020,"$2,501,494.00",,"$997,555.00","$5,013,319.00","$6,010,874.00","$4,783,438.00","$768,410.00","$53,118,352.00","$43,530,151.00",$0.42,$0.50,
2,Jefferies Financial Group Inc.,2019,"$1,526,992.00",,"$1,603,940.00","$2,289,036.00","$3,892,976.00","$3,530,186.00","$962,563.00","$49,460,234.00","$39,706,945.00",$0.39,$0.67,
3,Jefferies Financial Group Inc.,2018,"$1,904,870.00",,"$1,294,325.00","$2,469,709.00","$3,764,034.00","$3,435,708.00","$277,092.00","$47,131,095.00","$36,907,059.00",$0.51,$0.77,
4,Jefferies Financial Group Inc.,2017,"$1,764,285.00",,"$993,198.00","$3,084,247.00","$4,077,445.00","$3,294,840.00","$(35,784.00)","$47,169,108.00","$36,478,536.00",$0.43,$0.57,
5,CREDIT SUISSE,2021,"$14,440,057.00",,"$6,373,807.00","$18,520,347.00","$24,894,154.00","$20,940,002.00","$(1,783,481.00)","$829,036,964.00","$780,523,198.00",$0.58,$0.78,
6,CREDIT SUISSE,2020,"$13,316,481.00",,"$6,682,395.00","$18,470,959.00","$25,153,354.00","$20,026,963.00","$2,995,169.00","$920,082,013.00","$871,839,119.00",$0.53,$0.72,
7,CREDIT SUISSE,2019,"$11,475,882.00",,"$7,216,908.00","$15,907,642.00","$23,124,550.00","$17,936,851.00","$3,522,575.00","$809,724,365.00","$764,764,990.00",$0.50,$0.72,
8,CREDIT SUISSE,2018,"$12,052,712.00",,"$7,104,916.00","$14,101,369.00","$21,206,285.00","$17,539,787.00","$2,038,520.00","$779,438,419.00","$724,896,000.00",$0.57,$0.85,
9,CREDIT SUISSE,2017,"$12,128,708.00",,"$6,729,960.00","$14,721,338.00","$21,451,298.00","$19,395,463.00","$(1,008,929.00)","$817,293,441.00","$773,991,584.00",$0.57,$0.82,


In [49]:
bank_data_df['Total IB fees'].dtypes

dtype('O')

In [48]:
x_axis = np.sort(bank_data_df['Fiscal Year'].unique(), axis=-1)
x_tick_locations = [value for value in x_axis]
plt.figure(figsize=(6,4))

# Each point on the sine chart is marked by a blue circle
jeff, = plt.plot(x_axis, bank_data_df['Total IB fees'], marker ='o', color='blue', label="Jefferies")
# Each point on the cosine chart is marked by a red triangle
#cosine_handle, = plt.plot(x_axis, cos, marker='^', color='red', label="Cosine")
#plt.plot(bank_data_df['Fiscal Year'],bank_data_df['Total IB fees'])

#plt.xticks(x_tick_locations,1)
plt.xlabel("Fiscal Year")
plt.ylabel("Investment Banking Fees (millions)")
plt.show()

<IPython.core.display.Javascript object>

TypeError: 'value' must be an instance of str or bytes, not a float

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [7]:
# Split up our data into groups based upon 'Home Library Definition' and 'Patron Type Definition'
banking_groups = bank_data_df.groupby(['Bank','Fiscal Year'])

# Create a new variable that holds the sum of our groups
new_banking_groups = banking_groups[['Total IB fees']]
new_banking_groups

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fd438ba7610>