In [26]:
import pandas as pd
import yfinance as yf
import requests
import re
import json


urls = {
    "Nasdaq": 'https://raw.githubusercontent.com/rreichel3/US-Stock-Symbols/main/nasdaq/nasdaq_full_tickers.json',
    "NYSE": 'https://raw.githubusercontent.com/rreichel3/US-Stock-Symbols/main/nyse/nyse_full_tickers.json',
    "AMEX": 'https://raw.githubusercontent.com/rreichel3/US-Stock-Symbols/main/amex/amex_full_tickers.json'
}

# Now urls is a dictionary with the keys "Nasdaq", "NYSE", and "AMEX" each associated with their respective URL
print(urls)

# Fetch the content of the JSON file
df = pd.DataFrame()

for key, url in urls.items():
    response = requests.get(url)
    df_tmp = pd.DataFrame(response.json())
    df_tmp['Exchange'] = key
    df = pd.concat([df, df_tmp], ignore_index = True)

# save a local copy
# df.to_csv('us_symbol_tickers.csv', index = False)
df.head()

{'Nasdaq': 'https://raw.githubusercontent.com/rreichel3/US-Stock-Symbols/main/nasdaq/nasdaq_full_tickers.json', 'NYSE': 'https://raw.githubusercontent.com/rreichel3/US-Stock-Symbols/main/nyse/nyse_full_tickers.json', 'AMEX': 'https://raw.githubusercontent.com/rreichel3/US-Stock-Symbols/main/amex/amex_full_tickers.json'}


Unnamed: 0,symbol,name,lastsale,netchange,pctchange,volume,marketCap,country,ipoyear,industry,sector,url,Exchange
0,AACG,ATA Creativity Global American Depositary Shares,$0.82,-0.0096,-1.157%,4725,26240107.0,China,2008.0,Other Consumer Services,Real Estate,/market-activity/stocks/aacg,Nasdaq
1,AACI,Armada Acquisition Corp. I Common Stock,$11.42,-0.03,-0.262%,12384,0.0,United States,2021.0,Blank Checks,Finance,/market-activity/stocks/aaci,Nasdaq
2,AACIU,Armada Acquisition Corp. I Unit,$11.43,0.0,0.00%,2,0.0,United States,2021.0,Blank Checks,Finance,/market-activity/stocks/aaciu,Nasdaq
3,AACIW,Armada Acquisition Corp. I Warrant,$0.15,0.0295,24.481%,200,0.0,United States,2021.0,Blank Checks,Finance,/market-activity/stocks/aaciw,Nasdaq
4,AADI,Aadi Bioscience Inc. Common Stock,$1.69,0.06,3.681%,292157,41496606.0,United States,,Biotechnology: Pharmaceutical Preparations,Health Care,/market-activity/stocks/aadi,Nasdaq


In [34]:
df['marketCap'] = pd.to_numeric(df['marketCap'], errors='coerce')
df['marketCap'] = df['marketCap'].fillna(0)
df['marketCap'] = df['marketCap'].astype(int)

df = df.sort_values(by=['marketCap'], ascending= False).reset_index(drop = True)
df['MarketCap_pct'] = df['marketCap'] / df['marketCap'].sum()
df['MarketCap_pct_cumsum'] = df['MarketCap_pct'].cumsum()
df_top_95pct = df[df['MarketCap_pct_cumsum'] <= 0.95]

In [50]:
sorted_industry_marketcap_sum = df.groupby(['industry'])['MarketCap_pct'].sum().reset_index(drop = False).sort_values(by='MarketCap_pct', ascending=False)

sorted_industry_marketcap_sum = sorted_industry_marketcap_sum[sorted_industry_marketcap_sum['MarketCap_pct']>.01]

import plotly.express as px

# fig = px.bar(sorted_industry_marketcap_sum, y='industry', x='MarketCap_pct', 
#              title='Market Cap Percentage by Industry (Top 95% Stocks)',
#              labels={'industry': 'Industry', 'MarketCap_pct': 'Market Cap Percentage'},
#              orientation='h',
#              height=600)

# # Update layout to ensure x-axis labels fit and bars are sorted
# fig.update_layout(
#     yaxis={'categoryorder':'total ascending'},  # Ensures bars are sorted by MarketCap_pct in descending order
#     xaxis_tickangle=-45,  # Adjusts x-axis label angle to fit long strings
#     margin=dict(l=450)  # Adds left margin to accommodate long industry names
# )

# # Show the plot
# fig.show()

In [51]:
sorted_industry_marketcap_sum

Unnamed: 0,industry,MarketCap_pct
134,Semiconductors,0.088766
37,Computer Software: Programming Data Processing,0.081261
36,Computer Software: Prepackaged Software,0.079918
21,Biotechnology: Pharmaceutical Preparations,0.051233
35,Computer Manufacturing,0.047849
0,,0.035137
75,Major Banks,0.03386
69,Industrial Machinery/Components,0.03154
30,Catalog/Specialty Distribution,0.030817
28,Business Services,0.028586


In [22]:
len(df['symbol'].str.strip().drop_duplicates())

7142

In [7]:
import requests
import pandas as pd
import json
from prettytable import PrettyTable
import streamlit as st
# from data import utils

In [8]:
@st.cache_data

def fetch_data(series_id):
    
    """Fetch observations for a FRED series and save to a DataFrame."""
    url = f"https://api.stlouisfed.org/fred/series/observations"
    
    api_key = "9648695895d3facac4fba9c7cb834427"
    
    params = {
        "series_id": series_id,
        "api_key": api_key,
        "file_type": "json"
    }
    response = requests.get(url, params=params)
    

    data = response.json()
    observations = data.get('observations', [])
    # Create DataFrame
    df = pd.DataFrame(observations)
    # Display the DataFrame
    
    return df



In [1]:
def get_gdp(freq, series_id): 

    # Define the URL and parameters
    url = 'http://dataservices.imf.org/REST/SDMX_JSON.svc/'

    param = [('dataset', 'IFS'),
            ('freq', freq),
            ('country', 'US'),
            ('series', series_id), 
            ('start', '?startPeriod=1920')]


    series = '.'.join([i[1] for i in param[1:4]])

    key = f'CompactData/{param[0][1]}/{series}{param[-1][1]}'

    # Combine API url with key specific to data request
    r = requests.get(f'{url}{key}').json()

    # Extract the data portion of results
    data = r['CompactData']['DataSet']['Series']

    # Check if data is a list or a single dictionary
    if isinstance(data, dict):
        data = [data]

    # Create pandas dataframe, column = country, row = obs
    df = pd.DataFrame({s['@REF_AREA']:
                    {i['@TIME_PERIOD']: float(i['@OBS_VALUE']) 
                        for i in s['Obs']} for s in data})

    # Convert index to datetime
    df.index = pd.to_datetime(df.index)
    
    return df

1.2 CPI and PPI

https://data.bls.gov/cgi-bin/surveymost?bls


In [2]:
# series_id = ['WPSFD4'] #'CUUR0000SA0', 'WPSFD4','EIUIR','EIUIQ','LNS14000000' # PPI, CPI, Import, Export, Unemployment
start_year = '1920'
end_year = '2024'

def get_bls_data(series_id, start_year, end_year): 
    
    headers = {'Content-type': 'application/json'}
    
    series_data = json.dumps({"seriesid": [series_id] ,"startyear" : start_year, "endyear" : end_year})  

    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data = series_data, headers = headers)
    json_data = json.loads(p.text)
    for series in json_data['Results']['series']:
        x=prettytable.PrettyTable(["series id","year","period","value","footnotes"])
        seriesId = series['seriesID']
        for item in series['data']:
            year = item['year']
            period = item['period']
            value = item['value']
            footnotes=""
            for footnote in item['footnotes']:
                if footnote:
                    footnotes = footnotes + footnote['text'] + ','
            if 'M01' <= period <= 'M12':
                x.add_row([seriesId,year,period,value,footnotes[0:-1]])
        output = open(seriesId + '.txt','w')
        output.write (x.get_string())
        output.close()
        
    # Extract the relevant data
    series_data = json_data['Results']['series'][0]['data']
    # Convert to DataFrame

    df = pd.DataFrame(series_data)

    return df
    

In [3]:
# CPI
df_cpi = get_bls_data('WPSFD4', start_year, end_year)

# PPI
df_ppi = get_bls_data('CUUR0000SA0', start_year, end_year)

# unemployment rate
df_unemployment_rate = get_bls_data('LNS14000000', start_year, end_year) 

NameError: name 'json' is not defined

In [None]:
df_ppi

Unnamed: 0,year,period,periodName,value,footnotes
0,1929,M12,December,17.2,[{}]
1,1929,M11,November,17.3,[{}]
2,1929,M10,October,17.3,[{}]
3,1929,M09,September,17.3,[{}]
4,1929,M08,August,17.3,[{}]
...,...,...,...,...,...
115,1920,M05,May,20.6,[{}]
116,1920,M04,April,20.3,[{}]
117,1920,M03,March,19.7,[{}]
118,1920,M02,February,19.5,[{}]


In [None]:
series_id = 'NGDP_XDC' # Nominal GDP in national currency 
freq = 'A'

df_gdp = get_gdp(freq, series_id)

In [None]:

import pandas as pd
import math
from prettytable import PrettyTable
from ..data import indicators_data as indicators



# def df_gdp():
    
#     i = indicators.get_major_indicators()
    
#     #inputs
#     us_gdp_pct_change = i.gdp_pct_change
#     us_gdp = i.gdp
#     us_real_gdp = i.real_gdp
     
#     # format
#     df_gdp = pd.merge(us_gdp_pct_change, us_gdp, on = 'date', how = 'outer')
#     df_gdp = pd.merge(df_gdp, us_real_gdp, on = 'date', how = 'outer')
#     df_gdp = df_gdp.sort_values(by='date', ascending= True).reset_index(drop = True)
#     df_gdp['date'] = pd.to_datetime(df_gdp['date'])
#     # Function to convert date to quarter
#     def date_to_quarter(date):
#         return f"{date.year} Q{((date.month - 1) // 3) + 1}"
#     # Apply the function to the dataframe
#     df_gdp['Quarter'] = df_gdp['date'].apply(date_to_quarter)
#     df_gdp = df_gdp[df_gdp['date']>='1948-01-01']
#     # Convert 'GDP' and 'Real_GDP' columns to numeric, coercing errors
#     df_gdp['GDP'] = pd.to_numeric(df_gdp['GDP'], errors='coerce')
#     df_gdp['Real_GDP'] = pd.to_numeric(df_gdp['Real_GDP'], errors='coerce')

#     # Round up GDP values to integers
#     df_gdp['GDP'] = df_gdp['GDP'].apply(lambda x: math.ceil(x) if pd.notnull(x) else x)
#     df_gdp['Real_GDP'] = df_gdp['Real_GDP'].apply(lambda x: math.ceil(x) if pd.notnull(x) else x)
    
#     return df_gdp


# def df_cpi_ppi():
    
i = indicators.get_major_indicators()
i.get_ir_10yr()

ImportError: attempted relative import with no known parent package