In [None]:
import pandas as pd
import yfinance as yf
from datetime import date
import numpy as np
import plotly.graph_objects as go
import mplfinance as mpf
pd.set_option("display.max_rows", None, "display.max_columns", None)


In [None]:
#read the file

output_files=["nasdaq_mega", "nasdaq_large","nasdaq_medium","nasdaq_small","nasdaq_micro","nasdaq_nano"]
save_dir="/home/thakur/test/combined/"
today=date.today()
today=today.strftime("%m%d%y")
#overide due to delay analysis
today='091722'

print(f"Today: {today}\n")


In [None]:
def get_info(stock,par):
    """
    returns the par information regarding the stock
    """
    ticker=yf.Ticker(stock)
    print(f"Working for {stock}")
    return ticker.info[par]


    

#mega['BETA']=mega['TICKER'].apply(lambda x:get_info(x,"beta"))

def change_index(df):
    "changes the index of a dataframe from 1 to length of the dataframe"
    df.index=np.arange(1,len(df.index)+1)
    return df

def get_df_len(df):
    "gives the length of the dataframe"
    print(f"Length: {len(df.index)}")
    return len(df.index)
    

def get_all_info(st):
    "returns all the information of the given ticker"
    t=yf.Ticker(st)
    info=t.info
    info_df=pd.DataFrame.from_dict(info,orient='index',columns=['VALUE'])
    info_df.index.name='INFO'
    return info_df

#create yfinance portfolio sample
def yahoo_portfolio(df,save_name):
    """
    returns the yfinance compatable portfolio template for given df name of save file
    """
    cols=['Symbol','Current Price','Date', 'Time', 'Change', 'Open', 'High', 'Low', 'Volume', 'Trade Date',
          'Purchase Price','Quantity','Commission','High Limit','Low Limit','Comment']

    sym=list(df.TICKER)
    dic_val=[sym]+14*[None] #14 None
    my_dict=dict(zip(cols,dic_val)) #dictionary
    my_df=pd.DataFrame.from_dict(my_dict)
    my_df.to_csv(save_name+".csv")
    print(f"Created the yahoo template: {save_name}.csv from data frame !")
    
def get_graph(tick):
    "get line graph for given stock"
    ticker= yf.Ticker(tick)
    df = ticker.history(period='52wk')

    fig = go.Figure(data=go.Scatter(x=df.index,y=df['Close'], mode='lines'))
    fig.show()

def plot_with_mpl(tick,root,day=60,sty='yahoo',typ='candle'):
    "returns the candle stick of a stock tick for the given root file."

    df=pd.read_csv(root+tick+".csv").tail(day)
    df.Volume=df.Volume/10**5              #100k volume
    df.Date=pd.to_datetime(df.Date)
    df.set_index('Date',inplace=True)
    fig,axes=mpf.plot(df,type=typ,mav=(5,21),volume=True,tight_layout=False,figratio=(24,12),figscale=1,
                    returnfig=True,style=sty)
    axes[0].legend(["5-SMA","21-SMA"],loc='upper center')
    #axes[0].legend(loc='upper left')
    axes[0].set_title(tick)
    mpf.show()
    

## INFO
['zip','sector',
 'fullTimeEmployees',
 'longBusinessSummary',
 'city',
 'phone',
 'state',
 'country',
 'companyOfficers',
 'website',
 'maxAge',
 'address1',
 'industry',
 'ebitdaMargins',
 'profitMargins',
 'grossMargins',
 'operatingCashflow',
 'revenueGrowth',
 'operatingMargins',
 'ebitda',
 'targetLowPrice',
 'recommendationKey',
 'grossProfits',
 'freeCashflow',
 'targetMedianPrice',
 'currentPrice',
 'earningsGrowth',
 'currentRatio',
 'returnOnAssets',
 'numberOfAnalystOpinions',
 'targetMeanPrice',
 'debtToEquity',
 'returnOnEquity',
 'targetHighPrice',
 'totalCash',
 'totalDebt',
 'totalRevenue',
 'totalCashPerShare',
 'financialCurrency',
 'revenuePerShare',
 'quickRatio',
 'recommendationMean',
 'exchange',
 'shortName',
 'longName',
 'exchangeTimezoneName',
 'exchangeTimezoneShortName',
 'isEsgPopulated',
 'gmtOffSetMilliseconds',
 'quoteType',
 'symbol',
 'messageBoardId',
 'market',
 'annualHoldingsTurnover',
 'enterpriseToRevenue',
 'beta3Year',
 'enterpriseToEbitda',
 '52WeekChange',
 'morningStarRiskRating',
 'forwardEps',
 'revenueQuarterlyGrowth',
 'sharesOutstanding',
 'fundInceptionDate',
 'annualReportExpenseRatio',
 'totalAssets',
 'bookValue',
 'sharesShort',
 'sharesPercentSharesOut',
 'fundFamily',
 'lastFiscalYearEnd',
 'heldPercentInstitutions',
 'netIncomeToCommon',
 'trailingEps',
 'lastDividendValue',
 'SandP52WeekChange',
 'priceToBook',
 'heldPercentInsiders',
 'nextFiscalYearEnd',
 'yield',
 'mostRecentQuarter',
 'shortRatio',
 'sharesShortPreviousMonthDate',
 'floatShares',
 'beta',
 'enterpriseValue',
 'priceHint',
 'threeYearAverageReturn',
 'lastSplitDate',
 'lastSplitFactor',
 'legalType',
 'lastDividendDate',
 'morningStarOverallRating',
 'earningsQuarterlyGrowth',
 'priceToSalesTrailing12Months',
 'dateShortInterest',
 'pegRatio',
 'ytdReturn',
 'forwardPE',
 'lastCapGain',
 'shortPercentOfFloat',
 'sharesShortPriorMonth',
 'impliedSharesOutstanding',
 'category',
 'fiveYearAverageReturn',
 'previousClose',
 'regularMarketOpen',
 'twoHundredDayAverage',
 'trailingAnnualDividendYield',
 'payoutRatio',
 'volume24Hr',
 'regularMarketDayHigh',
 'navPrice',
 'averageDailyVolume10Day',
 'regularMarketPreviousClose',
 'fiftyDayAverage',
 'trailingAnnualDividendRate',
 'open',
 'toCurrency',
 'averageVolume10days',
 'expireDate',
 'algorithm',
 'dividendRate',
 'exDividendDate',
 'circulatingSupply',
 'startDate',
 'regularMarketDayLow',
 'currency',
 'trailingPE',
 'regularMarketVolume',
 'lastMarket',
 'maxSupply',
 'openInterest',
 'marketCap',
 'volumeAllCurrencies',
 'strikePrice',
 'averageVolume',
 'dayLow',
 'ask',
 'askSize',
 'volume',
 'fiftyTwoWeekHigh',
 'fromCurrency',
 'fiveYearAvgDividendYield',
 'fiftyTwoWeekLow',
 'bid',
 'tradeable',
 'dividendYield',
 'bidSize',
 'dayHigh',
 'coinMarketCapLink',
 'regularMarketPrice',
 'logo_url']

In [None]:
def HH_df(df):
    """
    higher high data-frame info for given dataframe
    """
    filter=(df['HH']==True)
    return df[filter]

def HL_df(df):
    """
    higher low data-frame info for given dataframe
    """
    filter=(df['HL']==True)
    return df[filter]

def HC_df(df):
    """
    higher close data-frame info for given dataframe
    """
    filter=(df['HC']==True)
    return df[filter]

def HHHL_df(df):
    """
    higher high higher low data-frame info for given dataframe
    """
    filter=((df['HH']==True) & (df['HL']==True))
    return df[filter]

def HHHLHC_df(df):
    """
    higher high higher low data-frame info for given dataframe
    """
    filter=((df['HH']==True) & (df['HL']==True)& (df['HC']==True))
    return df[filter]

def check_consolidation(st_file,days=10,threhold=2):
    """
    checks if a stock is consolidating for the given period of time.
    """
    df=pd.read_csv(st_file).tail(days)
    #print(f"Close of {st_file}\t {df['Close']}\n")
    close_min=df['Close'].min();close_max=df['Close'].max()
    #print(f"Min Close of {st_file}:\t {close_min}\n")
    #print(f"Max Close of {st_file}:\t {close_max}\n")
    
    #defining the condition of consolidation
    if(close_min>=(1-threhold/100)*close_max):
       print(f"{st_file:50}Consolidating..!")
       return True
    else:
       print(f"{st_file:50}\tNot Consolidating..!")
       return False
    
#modify the df cp>sma,order by cp, rsi1>50 rsi2<70
def modify_df(df,sma,rsi1=50,rsi2=70):
    """
    returns the modified data-frame based on the given conditions
    """
    f1=(df['CP']>df[sma]);f2=(df['RSI']>rsi1);f3=(df['RSI']<rsi2)
    df=df[f1 & f2 & f3]
    df=df.sort_values(by=['CP'])
    df=change_index(df)
    print(f"Total length: {len(df.index)}")
    return df

In [None]:
print(f"Working on the data on {today}")
df_mega=pd.read_csv(save_dir+output_files[0]+today+".csv")
df_large=pd.read_csv(save_dir+output_files[1]+today+".csv")
df_medium=pd.read_csv(save_dir+output_files[2]+today+".csv")
df_small=pd.read_csv(save_dir+output_files[3]+today+".csv")
df_micro=pd.read_csv(save_dir+output_files[4]+today+".csv")
df_nano=pd.read_csv(save_dir+output_files[5]+today+".csv")


In [None]:
df_mega['CATEGORY']="mega";df_large['CATEGORY']="large";df_medium["CATEGORY"]="medium";df_small["CATEGORY"]="small"
df_micro['CATEGORY']="micro";df_nano['CATEGORY']="nano"


# combine all the dataframe

In [None]:
ignore_nano=False
if ignore_nano==False:
    df_total=pd.concat([df_mega,df_large,df_medium,df_small,df_micro,df_nano])
else:df_total=pd.concat([df_mega,df_large,df_medium,df_small,df_micro])
    
print(f"Total number: {len(df_total.index)}")
df_inc=df_total.sort_values(by=["%CHG"],ascending=False).dropna()
df_inc.index=np.arange(1,len(df_inc.index)+1)
df_inc=df_inc.round(2)
df_inc.head(40)
aee=df_inc[df_inc['TICKER']=='AEE']
aee

# group by $$HealthCare$$

In [None]:
#group by the industry
df_healthcare=df_inc[df_inc["SECTOR"]=='Health Care']
price_cut=2
df_healthcare=df_healthcare[(df_healthcare['CP']>price_cut)&(df_healthcare['HH']==True) & (df_healthcare['HC']==True)& (df_healthcare['HL']==True) & (df_healthcare['HV']==True) & (df_healthcare['VOL']>df_healthcare['AVGVOL'])]
df_healthcare=df_healthcare.sort_values(by=['CP']).dropna()

df_healthcare=change_index(df_healthcare)
print(f"Total: {len(df_healthcare.index)}")
df_healthcare.head(20)

In [None]:
#group by the industry
df_test=df_inc[df_inc["SECTOR"]=='Health Care']
#df_test.head()
# price_cut=2
# df_healthcare=df_healthcare[(df_healthcare['CP']>price_cut)&(df_healthcare['HH']==True) & (df_healthcare['HC']==True)& (df_healthcare['HL']==True) & (df_healthcare['HV']==True) & (df_healthcare['VOL']>df_healthcare['AVGVOL'])]
# df_healthcare=df_healthcare.sort_values(by=['CP']).dropna()

# df_healthcare=change_index(df_healthcare)
# print(f"Total: {len(df_healthcare.index)}")
# df_healthcare.head(20)
df=modify_df(df_test,sma='SMA5')
df.head(5)

# ALL PENNY STOCKS PRICE < $ 10; 50<RSI<60; NANO; HH; HL; HC


In [None]:
#group by the industry
import numpy as np
df_healthcare=df_inc
price_cut=10
#df_healthcare.replace(np.inf,0,inplace=True)
rsi_f=((df_healthcare['RSI']<60) & (df_healthcare['RSI']>50))
pch=(df_healthcare['%CHG']!=np.inf)
df_healthcare=df_healthcare[(df_healthcare['CP']<price_cut)&(df_healthcare['HH']==True) & (df_healthcare['HC']==True)& (df_healthcare['HL']==True) & (df_healthcare['HV']==True) & (df_healthcare['VOL']>df_healthcare['AVGVOL']) & rsi_f & pch]
df_healthcare=df_healthcare[df_healthcare['CATEGORY']!='nano']
df_healthcare=df_healthcare.sort_values(by=['CP']).dropna()

df_healthcare=change_index(df_healthcare)
print(f"Total: {len(df_healthcare.index)}")
df_healthcare.head(20)

# %CHG Decreasing !!!

In [None]:
# df_total=pd.concat([df_mega,df_large,df_medium,df_small,df_micro,df_nano])
# print(f"Total number: {len(df_total.index)}")
df_dec=df_total.sort_values(by=["%CHG"]).dropna()
df_dec=df_dec[df_dec['CP']!=0.0]
df_dec.index=np.arange(1,len(df_dec.index)+1)
df_dec.head(20)

# Price greater than SMA-50 & SMA-200


In [None]:
fil=((df_total['CP']>df_total['SMA50'])&(df_total['CP']>df_total['SMA200']) & (df_total['CATEGORY']!='nano'))
gt=df_total[fil]
gt=gt.sort_values(by=['CP'])

print(f"Total with price above sma-50 and sma-200: {len(gt.index)}")
change_index(gt)
gt.head(20)

# FILTERS

In [None]:
hh_filter=(df_large['HH']==True)
hl_filter=(df_large['HL']==True)
hc_filter=(df_large['HC']==True)
hv_filter=(df_large['HV']==True)
df_high=df_large[hh_filter & hl_filter & hc_filter & hv_filter]

# HH and HL test

In [None]:
mega=HHHL_df(df_mega)
mega=mega[(mega['RSI']>50) & (mega['CP']>mega['SMA5'])]
mega=mega.sort_values(by=['CP'])
print(f"Total mega: {len(mega.index)}")

mega=change_index(mega)
mega

# HH, HL, HC, RSI>50, CP>SMA5

In [None]:
large=HHHLHC_df(df_large)
large=large[(large['RSI']>50) & (large['CP']>large['SMA5']) & (large['%CHG']!=np.inf)]
large=large.sort_values(by=['CP'])

print(f"Total large: {len(large.index)}")
large=change_index(large)
large.head(20)


In [None]:
medium=HHHLHC_df(df_medium)
medium=medium[(medium['RSI']>50) & (medium['CP']>medium['SMA5'])]
medium=medium.sort_values(by=['CP'])
medium=change_index(medium)
print(f"Total medium: {len(medium.index)}")
medium.head(20)

In [None]:
small=HHHLHC_df(df_small)
change_index(small)
print(f"Total small: {len(small.index)}")
small.head(5)

In [None]:
micro=HHHLHC_df(df_micro)
change_index(micro)
print(f"Total micro: {len(micro.index)}")
micro.head(5)

In [None]:
nano=HHHLHC_df(df_nano)
change_index(nano)
print(f"Total nano: {len(nano.index)}")
nano.head(5)

# Combine HH and HL data frames

In [None]:
#combined=[mega,large,medium,small]#,micro]#,nano]  #Alert nano is not included
combined=[mega,large,medium,small,micro,nano]  #Alert nano is not included
df_c=pd.concat(combined)
#df_c=df_c[(df_c['HV']==True) & (df_c['VOL']>df_c['AVGVOL']) & (df_c['SMA50']<df_c['CP']) & (df_c['SMA200']<df_c['CP']) 
          #& (df_c['HC']==True) & (df_c['RSI']>50) & (df_c['%CHG']>0)]
# df_c=df_c[(df_c['VOL']>df_c['AVGVOL']) & (df_c['SMA50']<df_c['CP'])  
#           & (df_c['HC']==True) & (df_c['RSI']>50) & (df_c['%CHG']>0)]
# df_c=df_c[df_c['%CHG']!=np.inf]
#df_c


#atr less than 5% of a price
df_c=df_c[df_c['CP']>0.0];df_c=df_c[df_c['RSI']<80];df_c=df_c[df_c['CP']>df_c['SMA50']];df_c=df_c[df_c['CP']>df_c['SMA200']];
df_c=df_c[df_c['%CHG']!=np.inf]; df_c=df_c[df_c['HV']==True];df_c=df_c[df_c['VOL']>0.0]
df_c=df_c.sort_values(by=['CP'])

#change NaN with Others
df_c.fillna("Others")
df_c=df_c.replace(np.NaN,'Others')
change_index(df_c)
print(f"Total count {len(df_c.index)}")
df_c.round(2)
df_c.head(20)



In [None]:
s=list(df_c.SECTOR.unique())
for i in s:
    print(f"\n      === {i.upper()} ===\n")
    df=df_c[df_c.SECTOR==i]
    df=df.loc[:,['TICKER','CP','CATEGORY']]
    print(df)
    print()
    #print(f"{df_c[df_c.SECTOR==i]}")

In [None]:
save_file='nasdaq'+today+'.csv'
print(f"save file: {save_file}")
df_c.to_csv(save_file,index=False)

# ATR order

In [None]:
df_c=df_c[df_c['CP']<10]
df_a=df_c.sort_values(by='ATR',ascending=False);
change_index(df_a)
print(f"Total count {len(df_a.index)}")
df_a.head(20)

# RSI SORT

In [None]:
df_c=df_c.sort_values(by=["RSI"],ascending=False)
df_c=df_c[df_c.CP!=0.0]
df_c=change_index(df_c)
print(f"Total Length: {len(df_c.index)}")
df_c.head(20)

In [None]:

df_c.tail(10)

# More Filters

In [None]:
rsi=(df_c['RSI']>50)           #rsi above 50
rsi0=(df_c['RSI']<70)
p50=(df_c['CP']>df_c['SMA50']) #price above sma50
p200=(df_c['CP']>df_c['SMA200']) #price above sma50
hc=(df_c['HC']==True)          #higher close
vol=(df_c['VOL']>df_c['AVGVOL'])
#vol=((df_c['VOL']>df_c['AVGVOL']) & (df_c['AVGVOL']>1))
#ab=(df_c['SMA200']<df_c['SMA50']) & (df_c['SMA50']<df_c['SMA21'])


# Criterion
- **50 < RSI < 70**
- **CP>SMA50>SMA200**
- **SMA21>SMA50>SMA200**
- **HC**
- **VOL>VOL(10)**

In [None]:
#df_co=df_c[rsi & rsi0 & p50  & vol & hc & p200 & ab]
df_co=df_c[rsi & rsi0 & p50  & vol  & p200]
df_co=df_co.sort_values(by=['CP'])
change_index(df_co)
get_df_len(df_co)

#df_c.index=np.arange(1,len(df_c.index)+1)

#print(f"Total result {len(df_c.index)}")
df_co.round(2)

#save in the file
#yahoo_portfolio(df_co,"july08")

In [None]:
# for i in list(df_c['TICKER']):
#     get_graph(i)

In [None]:
#penny stocks
f_penny=df_c['CP']<5
df_penny=df_c[f_penny]

df_others=df_c[~f_penny] #negation


In [None]:
df_penny
df_penny.sort_values(by=['CP']).head(20)

In [None]:
df_others
df_others.sort_values(by=['CP']).head(20)

# S&P 500 

In [None]:
#change snp to list
#data frame for the S&P 

snp_csv=pd.read_csv("/home/thakur/stock_information/snplist.csv")
#snp_df.head()
snp_list=list(snp_csv['Symbol'])
snp_list[:10]


In [None]:
snp_df=df_total[df_total['TICKER'].isin(snp_list)]
snp_df=snp_df.sort_values(by=['%CHG'],ascending=False)
snp_df.index=np.arange(1,len(snp_df.index)+1)
snp_df.head(20)

# SAVE S&P

In [None]:
snp_fil=snp_df[(snp_df['HH']==True) & (snp_df['HL']==True) & (snp_df['HC']==True)&(snp_df['RSI']>50)&(snp_df['RSI']<70) & (snp_df['CP']>snp_df['SMA50']) & (snp_df['SMA50']>snp_df['SMA200'])] #hh, hl, hc
snp_fil=snp_fil.sort_values(by=['CP'])
change_index(snp_fil)

In [None]:
# #tests
# up20=snp_df.sort_values(by=['%CHG'],ascending=False)
# change_index(up20)
# up20.head(20)

In [None]:
#change snp to list
#data frame for dow jones

dow_csv=pd.read_csv("/home/thakur/stock_information/dow.csv")
#snp_df.head()
dow_list=list(dow_csv['Symbol'])
dow_list[:10]


In [None]:
import numpy as np
dow_df=df_total[df_total['TICKER'].isin(dow_list)]
dow_df=dow_df.sort_values(by=['%CHG'],ascending=False)

#dow_df.reset_index()
#dow_df.drop(['index'])
dow_df.index=np.arange(1,len(dow_df.index)+1)
dow_df

In [None]:
dow_fl=dow_df[(dow_df['HH']==True) & (dow_df['HL']==True) & (dow_df['HC']==True) & (dow_df['RSI']>50)]
change_index(dow_fl)

In [None]:


#hh and hl in the yahoo compatable file
#yahoo_portfolio(df_c,save_dir+today+"hhhl")

In [None]:
#apply rsi and price>50 sma
# df_1=df_c[rsi & p50]
# yahoo_portfolio(df_1,"df1_test")

In [None]:
# df_c['BETA']=df_c['TICKER'].apply(lambda x:get_info(x,"beta"))
# df_c.round(2)

In [None]:
#df_c.round(2)

In [None]:
df_c=df_c[rsi & p50 & vol].head()
df_c.round(2)