In [1]:
import yfinance as yf
import pandas as pd
import time

returns = pd.read_csv('returns.csv')
tickers = list(returns.columns)[1:]  

data = []

for ticker in tickers:
    try:
        info = yf.Ticker(ticker).info
        row = {
            'Ticker': ticker,
            'MarketCap': info.get('marketCap', 0),
            'Sector': info.get('sector', 'Unknown'),
            'Country': info.get('country', 'Unknown')
        }
        data.append(row)
        time.sleep(1.5)  
    except Exception as e:
        print(f"Error retrieving {ticker}: {e}")
        data.append({'Ticker': ticker, 'MarketCap': 0, 'Sector': 'Unknown', 'Country': 'Unknown'})


In [5]:
df = pd.DataFrame(data)

# Size classification
df['SmallCap'] = (df['MarketCap'] < 2e9).astype(int)
df['MidCap'] = ((df['MarketCap'] >= 2e9) & (df['MarketCap'] < 1e10)).astype(int)
df['LargeCap'] = (df['MarketCap'] >= 1e10).astype(int)

# Sector classification
df['Tech'] = df['Sector'].str.contains('Technology', case=False, na=False).astype(int)
df['Finance'] = df['Sector'].str.contains('Financial|Bank', case=False, na=False).astype(int)
df['Healthcare'] = df['Sector'].str.contains('Health', case=False, na=False).astype(int)
df['Consumer'] = df['Sector'].str.contains('Consumer', case=False, na=False).astype(int)
df['Energy'] = df['Sector'].str.contains('Energy|Oil|Gas', case=False, na=False).astype(int)
df['Industrial'] = df['Sector'].str.contains('Industrials', case=False, na=False).astype(int)
df['Utilities'] = df['Sector'].str.contains('Utilities', case=False, na=False).astype(int)

# Country classification
df['International'] = (df['Country'] != 'United States').astype(int)
df['Domestic'] = (df['Country'] == 'United States').astype(int)



In [6]:
df

Unnamed: 0,Ticker,MarketCap,Sector,Country,SmallCap,MidCap,LargeCap,Tech,Finance,Healthcare,Consumer,Energy,Industrial,Utilities,International,Domestic
0,AAPL,2981930991616,Technology,United States,0,0,1,1,0,0,0,0,0,0,0,1
1,ABBV,329658269696,Healthcare,United States,0,0,1,0,0,1,0,0,0,0,0,1
2,ABT,234991501312,Healthcare,United States,0,0,1,0,0,1,0,0,0,0,0,1
3,ACN,195583049728,Technology,Ireland,0,0,1,1,0,0,0,0,0,0,1,0
4,ADBE,164393861120,Technology,United States,0,0,1,1,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,V,679162478592,Financial Services,United States,0,0,1,0,1,0,0,0,0,0,0,1
96,VZ,185578242048,Communication Services,United States,0,0,1,0,0,0,0,0,0,0,0,1
97,WFC,239393767424,Financial Services,United States,0,0,1,0,1,0,0,0,0,0,0,1
98,WMT,789207777280,Consumer Defensive,United States,0,0,1,0,0,0,1,0,0,0,0,1
