In [1]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display
InteractiveShell.ast_node_interactivity = "all"

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd

In [4]:
%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [5]:
SHEET_NAME_STOCK_INFO = "stock_info"
SHEET_NAME_STOCK_FINANCIALS = "stock_financials"
SHEET_NAME_STOCK_DIVIDENDS = "stock_dividends"

# Data Cleansing

In [14]:
stock_info_1 = pd.read_excel("alphalib_united_states_1.xlsx", sheet_name=SHEET_NAME_STOCK_INFO, engine="openpyxl")
stock_info_2 = pd.read_excel("alphalib_united_states_2.xlsx", sheet_name=SHEET_NAME_STOCK_INFO, engine="openpyxl")
print(len(stock_info_1.columns), len(stock_info_2.columns))

153 154


In [15]:
def create_missing_cols(df, target_cols):
    columns = df.columns.tolist()
    missing_cols = list(set(target_cols) - set(columns))
    df[missing_cols] = None

In [29]:
# Create missing columns for stock_info
print("Create missing columns")
if len(stock_info_1.columns) > len(stock_info_2.columns):
    stock_info_columns = stock_info_1.columns.tolist()
    stock_info_columns.sort()
    create_missing_cols(stock_info_2, stock_info_columns)
    stock_info_2 = stock_info_2[stock_info_columns]
    print(len(stock_info_1.columns), len(stock_info_2.columns))
elif len(stock_info_1.columns) < len(stock_info_2.columns):
    stock_info_columns = stock_info_2.columns.tolist()
    stock_info_columns.sort()
    create_missing_cols(stock_info_1, stock_info_columns)
    stock_info_1 = stock_info_1[stock_info_columns]
    print(len(stock_info_1.columns), len(stock_info_2.columns))
    
# Remove overlapped stocks
print("Remove overlapped stocks")
filter = ~stock_info_2["symbol"].isin(stock_info_1["symbol"].tolist()) 
print(len(stock_info_2[filter]))

# Merge the 2 data frames
stock_info_all = pd.concat([stock_info_1, stock_info_2], ignore_index=True)
print(len(stock_info_all))
stock_info_all.head(2)

Create missing columns
Remove overlapped stocks
316
1949


Unnamed: 0,52WeekChange,SandP52WeekChange,address1,address2,algorithm,annualHoldingsTurnover,annualReportExpenseRatio,ask,askSize,averageDailyVolume10Day,averageVolume,averageVolume10days,beta,beta3Year,bid,bidSize,bookValue,category,circulatingSupply,city,coinMarketCapLink,companyOfficers,country,currency,currentPrice,currentRatio,dateShortInterest,dayHigh,dayLow,debtToEquity,dividendRate,dividendYield,earningsGrowth,earningsQuarterlyGrowth,ebitda,ebitdaMargins,enterpriseToEbitda,enterpriseToRevenue,enterpriseValue,exDividendDate,exchange,exchangeTimezoneName,exchangeTimezoneShortName,expireDate,fiftyDayAverage,fiftyTwoWeekHigh,fiftyTwoWeekLow,financialCurrency,fiveYearAverageReturn,fiveYearAvgDividendYield,floatShares,forwardEps,forwardPE,freeCashflow,fromCurrency,fullTimeEmployees,fundFamily,fundInceptionDate,gmtOffSetMilliseconds,grossMargins,grossProfits,heldPercentInsiders,heldPercentInstitutions,impliedSharesOutstanding,industry,isEsgPopulated,lastCapGain,lastDividendDate,lastDividendValue,lastFiscalYearEnd,lastMarket,lastSplitDate,lastSplitFactor,legalType,logo_url,longBusinessSummary,longName,market,marketCap,maxAge,maxSupply,messageBoardId,morningStarOverallRating,morningStarRiskRating,mostRecentQuarter,navPrice,netIncomeToCommon,nextFiscalYearEnd,numberOfAnalystOpinions,open,openInterest,operatingCashflow,operatingMargins,payoutRatio,pegRatio,phone,preMarketPrice,previousClose,priceHint,priceToBook,priceToSalesTrailing12Months,profitMargins,quickRatio,quoteType,recommendationKey,recommendationMean,regularMarketDayHigh,regularMarketDayLow,regularMarketOpen,regularMarketPreviousClose,regularMarketPrice,regularMarketVolume,returnOnAssets,returnOnEquity,revenueGrowth,revenuePerShare,revenueQuarterlyGrowth,sector,sharesOutstanding,sharesPercentSharesOut,sharesShort,sharesShortPreviousMonthDate,sharesShortPriorMonth,shortName,shortPercentOfFloat,shortRatio,startDate,state,strikePrice,symbol,targetHighPrice,targetLowPrice,targetMeanPrice,targetMedianPrice,threeYearAverageReturn,toCurrency,totalAssets,totalCash,totalCashPerShare,totalDebt,totalRevenue,tradeable,trailingAnnualDividendRate,trailingAnnualDividendYield,trailingEps,trailingPE,twoHundredDayAverage,volume,volume24Hr,volumeAllCurrencies,website,yield,ytdReturn,zip
0,-0.295687,-0.125364,100 North Riverside Plaza,,,,,154.5,1000.0,5131960,8765122,5131960,1.382269,,153.8,1200.0,-25.087,,,Chicago,,[],United States,USD,153.66,1.25,1660522000.0,158.9,150.82,,,,-0.68,-0.671,119000000.0,0.00196,1194.715,2.341,142171100000.0,1581552000.0,NYQ,America/New_York,EDT,,155.6086,233.94,113.02,USD,,,548990300.0,4.17,36.848923,742500000.0,,142000.0,,,-14400000,0.05994,5190000000.0,0.00099,0.57868,0.0,Aerospace & Defense,False,,1581552000.0,2.055,1640909000.0,,865814400.0,2:1,,https://logo.clearbit.com/boeing.com,"The Boeing Company, together with its subsidia...",The Boeing Company,us_market,91245000000.0,1,,finmb_370857,,,1656547000.0,,-5278000000.0,1703981000.0,,158.0,,-2681000000.0,-0.03164,0.0,-4.11,312 544 2000,,160.25,2,,1.502148,-0.08689,0.283,EQUITY,none,,158.9,150.82,158.0,160.25,153.66,7554251,-0.00845,,-0.019,102.745,,Industrials,593811000.0,0.0176,10462365.0,1657843000.0,10263831.0,Boeing Company (The),0.0176,1.23,,IL,,BA,,,,,,,,11448000000.0,19.279,57200000000.0,60743000000.0,False,0.0,0.0,-8.285,,175.3606,7554251,,,https://www.boeing.com,,,60606-1596
1,-0.21016,-0.125364,300 Renaissance Center,,,,,38.7,800.0,15896830,14645014,15896830,1.284261,,38.37,800.0,42.636,,,Detroit,,[],United States,USD,38.56,1.147,1660522000.0,38.6,37.51,163.958,0.36,0.0093,-0.4,-0.403,16429000000.0,0.12437,9.204,1.145,151215400000.0,1661818000.0,NYQ,America/New_York,EDT,,35.7262,67.21,30.33,USD,,,1385292000.0,6.29,6.130366,-4729750000.0,,157000.0,,,-14400000,0.13551,18079000000.0,0.04867,0.83544,0.0,Auto Manufacturers,False,,1661818000.0,0.09,1640909000.0,,,,,https://logo.clearbit.com/gm.com,"General Motors Company designs, builds, and se...",General Motors Company,us_market,56222410000.0,1,,finmb_61206100,,,1656547000.0,,7723000000.0,1703981000.0,22.0,37.81,,11988000000.0,0.07927,0.0,1.93,313-667-1500,,38.21,2,0.9044,0.425602,0.06655,0.853,EQUITY,buy,2.0,38.6,37.51,37.81,38.21,38.56,10919251,0.02642,0.13627,0.047,90.76,,Consumer Cyclical,1458050000.0,0.0186,27103216.0,1657843000.0,31247870.0,General Motors Company,0.0186,2.13,,MI,,GM,95.0,29.0,52.11,47.5,,,,21534000000.0,14.769,112106000000.0,132101000000.0,False,0.0,0.0,5.26,,44.54245,10919251,,,https://www.gm.com,,,48265-3000


In [30]:
# Clean up stock_dividends

In [31]:
# Clean up stock_financials

In [33]:
# Save to new file