In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import torch
import sklearn
import seaborn as sns
import os 

# import csv file

In [None]:
df = pd.read_csv('/content/dataset.csv', header=0, index_col=0, parse_dates=True, squeeze=True)


# Looking into data(df)

In [None]:
df.head(10)

In [None]:
print(df.describe())

# Working with NAN data

In [None]:
df.isna().sum() 

# first delete NAN data

In [None]:
data = df.dropna() 
data.head(10)

# Plot

In [None]:
plt.figure(figsize=(10,10))
plt.xlabel("Months")
plt.ylabel("counter")
sns.distplot(data["ret_wrt_ind_fwd30d_log"]);
plt.grid()
plt.show()

## Adding MFI indicator
### /3(کمترین قیمت+ بالاترین قیمت+قیمت بسته

In [None]:
import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")
    
    
data["MFI"] = (data["adj_close_price"]+data["adj_max_price"]+data["adj_min_price"])/3;

In [None]:
data

# Plot MFI coloumns

In [None]:
plt.figure(figsize = (10,10))
plt.grid()
plt.plot(data["MFI"])
plt.show()

In [None]:
plt.figure(figsize = (10,10))
plt.grid()
plt.hist(data["MFI"],color = "red")
plt.show()

In [None]:
data.plot(x='date', y='adj_close_price',style = "-")
plt.grid()
plt.show()

# Add average_day column

In [None]:
data["average_day"] = (data["adj_max_price"]+data["adj_min_price"])/2
print(data["average_day"])

In [None]:
plt.figure(figsize = (15,15))
data.plot(x='date', y='adj_volume',style = "-")
plt.grid()
plt.show()

In [None]:
spy = data.sort_values(by='date')
spy.set_index('date',inplace=True)
spy['adj_close_price'].plot(figsize=(16, 12))

# P = number of periods (20 commonly used)

# MA  = moving average, moving average = typical price / p

In [None]:
p = 20
data["MA"] = data["adj_close_price"]/p ;

# mean deviation = (typical price — MA) / p

In [None]:
data["mean-deviation"] = (data["adj_close_price"]-data["MA"])/p
plt.plot(data["mean-deviation"])
plt.show()

#Set the date as the index

In [None]:
data = data.set_index(pd.DatetimeIndex(data['date'].values))
data.head(10)

In [None]:
plt.figure(figsize=(12.2,4.5)) #width = 12.2in, height = 4.5
plt.plot( data['adj_close_price'],  label='Close Price')#plt.plot( X-Axis , Y-Axis, line_width, alpha_for_blending,  label)  
plt.title('Close Price History')
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price USD ($)',fontsize=18)
plt.legend(df.columns.values, loc='upper left')
plt.show()

#Calculate the typical price

In [None]:
typical_price = (data['adj_close_price'] + data['adj_max_price'] + data['adj_min_price']) / 3
typical_price

# Get the time period.

In [None]:
period =  14 #The typical period used for MFI is 14 days

In [None]:
#Calculate the money flow
money_flow = typical_price * data['adj_volume']
money_flow

In [None]:
#Get all of the positive and negative money flows 
#where the current typical price is higher than the previous day's typical price, we will append that days money flow to a positive list
#and where the current typical price is lower than the previous day's typical price, we will append that days money flow to a negative list
#and set any other value to 0 to be used when summing
positive_flow =[] #Create a empty list called positive flow
negative_flow = [] #Create a empty list called negative flow
#Loop through the typical price 
for i in range(1, len(typical_price)):
  if typical_price[i] > typical_price[i-1]: #if the present typical price is greater than yesterdays typical price
    positive_flow.append(money_flow[i-1])# Then append money flow at position i-1 to the positive flow list
    negative_flow.append(0) #Append 0 to the negative flow list
  elif typical_price[i] < typical_price[i-1]:#if the present typical price is less than yesterdays typical price
    negative_flow.append(money_flow[i-1])# Then append money flow at position i-1 to negative flow list
    positive_flow.append(0)#Append 0 to the positive flow list
  else: #Append 0 if the present typical price is equal to yesterdays typical price
    positive_flow.append(0)
    negative_flow.append(0)

In [None]:
#Get all of the positive and negative money flows within the time period
positive_mf =[]
negative_mf = [] 
#Get all of the positive money flows within the time period
for i in range(period-1, len(positive_flow)):
  positive_mf.append(sum(positive_flow[i+1-period : i+1]))
#Get all of the negative money flows within the time period  
for i in range(period-1, len(negative_flow)):
  negative_mf.append(sum(negative_flow[i+1-period : i+1]))

In [None]:
mfi = 100 * (np.array(positive_mf) / (np.array(positive_mf)  + np.array(negative_mf) ))
mfi

In [None]:
# Visually Show The Money Flow Index
df2 = pd.DataFrame()
df2['MFI'] = mfi
#Create and plot the graph
plt.figure(figsize=(12.2,4.5)) #width = 12.2in, height = 4.5
plt.plot( df2['MFI'],  label='MFI')#plt.plot( X-Axis , Y-Axis, line_width, alpha_for_blending,  label)
plt.axhline(10, linestyle='--', color = 'orange')  #Over Sold line (Buy)
plt.axhline(20, linestyle='--',color = 'blue')  #Over Sold Line (Buy)
plt.axhline(80, linestyle='--', color = 'blue')  #Over Bought line (Sell)
plt.axhline(90, linestyle='--', color = 'orange')  #Over Bought line (Sell)
plt.title('MFI')
plt.ylabel('MFI Values',fontsize=18)
plt.legend(df2.columns.values, loc='upper left')
plt.show()

In [None]:
#Create a new data frame
new_df = pd.DataFrame()
new_df = data[period:]
new_df['MFI'] = mfi
#Show the new data frame
new_df

# Average basse for close_price ---> in date

In [None]:
close_px = new_df['adj_close_price']
mavg = close_px.rolling(window=100).mean()
mavg

# Plot AAPL and MAVG

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import style

# Adjusting the size of matplotlib
import matplotlib as mpl
mpl.rc('figure', figsize=(10, 10))
mpl.__version__

# Adjusting the style of matplotlib
style.use('ggplot')

close_px.plot(label='AAPL')
mavg.plot(label='mavg')
plt.legend()

# **Return Deviation — to determine risk and return**
#Expected Return measures the mean, or expected value, of the probability distribution of investment returns. The expected return of a portfolio is calculated by multiplying the weight of each asset by its expected return and adding the values for each investment — Investopedia.


In [None]:
rets = close_px / close_px.shift(1) - 1
rets.plot(label='return')

# Addin CCI indicator
#CCI = (TP - SMA 20 OF TP) / (0.15 * MEAN DEVIATION)
where //
Typical Price (TP) = (HIGH + LOW + CLOSE) / 3
SMA 20 of TP = Typical Price / 20
Mean Deviation = Absolute values of Typical Price / 20

In [None]:
import numpy as np
new_df["CCI"]  = (new_df["MFI"]-(new_df["MFI"]/p))/(0.15*new_df["mean-deviation"])


In [None]:
new_df

# **CCI Plot**
In this step, we are going to plot the extracted Commodity Channel Index values of **Hermes** to make more sense out of it. The main aim of this part is not on the coding section but instead to **observe the plot to gain a solid understanding of** the Commodity Channel Index.

In [None]:
ax1 = plt.subplot2grid((10,1), (0,0), rowspan = 5, colspan = 1)
ax2 = plt.subplot2grid((10,1), (6,0), rowspan = 4, colspan = 1)
ax1.plot(new_df['adj_close_price'])
ax1.set_title('Hermes SHARE PRICE')
ax2.plot(new_df['CCI'], color = 'orange')
ax2.set_title('Hermes CCI 14')
ax2.axhline(150, linestyle = '--', linewidth = 1, color = 'black')
ax2.axhline(-150, linestyle = '--', linewidth = 1, color = 'black')
plt.show()

# **Creating the trading strategy**
#In this step, we are going to implement the discussed CCI trading strategy in python with the overbought and oversold levels as 150 and -150 respectively.

In [None]:
def implement_cci_strategy(prices, cci):
    buy_price = []
    sell_price = []
    cci_signal = []
    signal = 0
    
    lower_band = (-150)
    upper_band = 150
    
    for i in range(len(prices)):
        if cci[i-1] > lower_band and cci[i] < lower_band:
            if signal != 1:
                buy_price.append(prices[i])
                sell_price.append(np.nan)
                signal = 1
                cci_signal.append(signal)
            else:
                buy_price.append(np.nan)
                sell_price.append(np.nan)
                cci_signal.append(0)
                
        elif cci[i-1] < upper_band and cci[i] > upper_band:
            if signal != -1:
                buy_price.append(np.nan)
                sell_price.append(prices[i])
                signal = -1
                cci_signal.append(signal)
            else:
                buy_price.append(np.nan)
                sell_price.append(np.nan)
                cci_signal.append(0)
                
        else:
            buy_price.append(np.nan)
            sell_price.append(np.nan)
            cci_signal.append(0)
            
    return buy_price, sell_price, cci_signal

buy_price, sell_price, cci_signal = implement_cci_strategy(new_df['adj_close_price'],new_df['CCI'])

# **Plotting the trading signals**
#In this step, we are going to plot the created trading lists to make sense out of them.

In [None]:
ax1 = plt.subplot2grid((10,1), (0,0), rowspan = 5, colspan = 1)
ax2 = plt.subplot2grid((10,1), (6,0), rowspan = 4, colspan = 1)
ax1.plot(new_df['adj_close_price'], color = 'skyblue', label = 'FB')
ax1.plot(new_df.index, buy_price, marker = '^', markersize = 12, linewidth = 0, label = 'BUY SIGNAL', color = 'green')
ax1.plot(new_df.index, sell_price, marker = 'v', markersize = 12, linewidth = 0, label = 'SELL SIGNAL', color = 'r')
ax1.set_title('Hermes SHARE PRICE')
ax1.legend()
ax2.plot(new_df['CCI'], color = 'orange')
ax2.set_title('Hermes CCI 14')
ax2.axhline(150, linestyle = '--', linewidth = 1, color = 'black')
ax2.axhline(-150, linestyle = '--', linewidth = 1, color = 'black')
plt.show()

# **Creating our Position**
#In this step, we are going to create a list that indicates 1 if we hold the stock or 0 if we don’t own or hold the stock.

In [None]:
position = []
for i in range(len(cci_signal)):
    if cci_signal[i] > 1:
        position.append(0)
    else:
        position.append(1)
        
for i in range(len(new_df['adj_close_price'])):
    if cci_signal[i] == 1:
        position[i] = 1
    elif cci_signal[i] == -1:
        position[i] = 0
    else:
        position[i] = position[i-1]
        
cci = new_df['CCI']
close_price = new_df['adj_close_price']
cci_signal = pd.DataFrame(cci_signal).rename(columns = {0:'cci_signal'}).set_index(new_df.index)
position = pd.DataFrame(position).rename(columns = {0:'cci_position'}).set_index(new_df.index)

frames = [close_price, cci, cci_signal, position]
strategy = pd.concat(frames, join = 'inner', axis = 1)

strategy.head()


# **Backtesting**
#Before moving on, it is essential to know what backtesting is. Backtesting is the process of seeing how well our trading strategy has performed on the given stock data. In our case, we are going to implement a backtesting process for our CCI trading strategy over the Facebook stock data.

In [None]:
import requests
from termcolor import colored as cl
fb_ret = pd.DataFrame(np.diff(new_df['adj_close_price'])).rename(columns = {0:'returns'})
cci_strategy_ret = []

for i in range(len(fb_ret)):
    returns = fb_ret['returns'][i]*strategy['cci_position'][i]
    cci_strategy_ret.append(returns)
    
cci_strategy_ret_df = pd.DataFrame(cci_strategy_ret).rename(columns = {0:'cci_returns'})
investment_value = 100000
number_of_stocks = np.floor(investment_value/new_df['adj_close_price'][-1])
cci_investment_ret = []

for i in range(len(cci_strategy_ret_df['cci_returns'])):
    returns = number_of_stocks*cci_strategy_ret_df['cci_returns'][i]
    cci_investment_ret.append(returns)

cci_investment_ret_df = pd.DataFrame(cci_investment_ret).rename(columns = {0:'investment_returns'})
total_investment_ret = round(sum(cci_investment_ret_df['investment_returns']), 2)
profit_percentage = round((total_investment_ret/investment_value)*100, 2)
print(cl('Profit gained from the CCI strategy by investing $100k in FB : {}'.format(total_investment_ret), attrs = ['bold']))
print(cl('Profit percentage of the CCI strategy : {}%'.format(profit_percentage), attrs = ['bold']))

In [None]:
ekhtelaf = new_df["adj_close_price"] - new_df["adj_last_price"]
plt.plot(ekhtelaf)

In [None]:
new_df.head(15)

# **Create binary class**

In [None]:
new_df["class"] = new_df["ret1d_log"] + 1
for i in range(0,len(new_df["ret30d_log"]-1)):
    if (new_df["ret1d_log"][i]/(new_df["ret30d_log"][i]/2))*100 > 0.85:
        new_df["class"][i] = 1
    else:
        new_df["class"][i] = 0

In [None]:
new_df["class"]

In [None]:
plt.hist(new_df["class"]);


In [None]:
new_df.shape

In [None]:
new_df.head(3)

In [None]:
del new_df["stock_name"]

In [None]:
del new_df["date"]

In [None]:
new_df.head(3)

# **Using LogesticRegression for classification**

In [None]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_lbl, test_lbl = train_test_split( new_df.iloc[:,:-1], new_df["class"], test_size=1/7.0, random_state=0)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit on training set only.
scaler.fit(train_x)
# Apply transform to both the training set and the test set.
train_x = scaler.transform(train_x)
test_x = scaler.transform(test_x)


In [None]:
from sklearn.linear_model import LogisticRegression
logisticRegr = LogisticRegression(solver = 'lbfgs')
logisticRegr.fit(train_x, train_lbl)

In [None]:
logisticRegr.predict(test_x[0].reshape(1,-1))

In [None]:
logisticRegr.predict(test_x[0:10])

# **Accuracy is not good**

In [None]:
logisticRegr.score(test_x, test_lbl)

# **Decision Tree**

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(max_depth = 2, random_state = 0)

In [None]:
clf.fit(train_x,train_lbl)

In [None]:
# Predict for 1 observation
clf.predict(test_x[0:10])
# Predict for multiple observations
clf.predict(test_x[0:10])

# **Decision tree > Logestic Regression**

In [None]:
# The score method returns the accuracy of the model
score = clf.score(test_x,test_lbl)
print(score)

In [None]:
# List of values to try for max_depth:
max_depth_range = list(range(1, 6))
# List to store the accuracy for each value of max_depth:
accuracy = []
for depth in max_depth_range:
    
    clf = DecisionTreeClassifier(max_depth = depth, 
                             random_state = 0)
    clf.fit(train_x, train_lbl)
    score = clf.score(test_x, test_lbl)
    accuracy.append(score)

In [None]:
plt.plot(max_depth_range,accuracy)

# **KNN classifiers**

In [None]:
#import KNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier

#Setup arrays to store training and test accuracies
neighbors = np.arange(1,5)
train_accuracy =np.empty(len(neighbors))
test_accuracy = np.empty(len(neighbors))

for i,k in enumerate(neighbors):
    #Setup a knn classifier with k neighbors
    knn = KNeighborsClassifier(n_neighbors=k)
    
    #Fit the model
    knn.fit(train_x,train_lbl)
    
    #Compute accuracy on the training set
    train_accuracy[i] = knn.score(train_x,train_lbl)
    
    #Compute accuracy on the test set
    test_accuracy[i] = knn.score(test_x,test_lbl) 



In [None]:
plt.title('k-NN Varying number of neighbors')
plt.plot(neighbors, test_accuracy, label='Testing Accuracy')
plt.plot(neighbors, train_accuracy, label='Training accuracy')
plt.legend()
plt.xlabel('Number of neighbors')
plt.ylabel('Accuracy')
plt.show()


In [None]:
knn.score(test_x,test_lbl)

In [None]:
y_pred = knn.predict(test_x)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(test_lbl,y_pred))

# **XGboost classifier**

In [None]:
from xgboost import XGBClassifier
model = XGBClassifier()
model.fit(train_x,train_lbl)

In [None]:
y_pred = model.predict(test_x)
predictions = [round(value) for value in y_pred]

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(test_lbl, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))