In [1]:
import numpy as np
import pandas as pd
from fbprophet import Prophet

In [2]:
df = pd.read_csv('stock.csv')

### remove unwanted columns

In [3]:
df = df.drop(['Unnamed: 0', 'open', 'high', 'low', 'volume', 'oi'], 1)

### rename columns as 'ds' and 'y' as Prophet requires this naming convention

In [4]:
df.columns = ['ds', 'y']

### remove the '+05:30' string from the end of dates

In [5]:
df['ds'] = df['ds'].str.replace(r'\+05:30', '')

### convert date column to datetime format

In [6]:
df['ds'] = df['ds'].apply(pd.to_datetime)

### create time series with the missing times

In [7]:
actual = pd.date_range("2021-04-27 13:11", "2021-05-27 16:39", freq="1min")

In [8]:
data = {'ds':actual}
complete_df = pd.DataFrame(data)

In [9]:
full = complete_df.merge(df, on='ds', how='left')

### filling missing times with the previous values

In [10]:
full['y'] = full['y'].fillna(method='ffill')

### train-test split

In [11]:
train = full[:int(0.8*len(full))]
test = full[int(0.8*len(full)):]

### Create Prophet model

In [12]:
m = Prophet()
m.fit(train)

INFO:numexpr.utils:NumExpr defaulting to 4 threads.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7efd1718ccc0>

In [13]:
future_df = test[['ds']]

In [14]:
final_df = pd.concat([test, future_df])

In [15]:
forecast = m.predict(future_df)
forecast = forecast[['ds', 'yhat']]
forecast.tail()

Unnamed: 0,ds,yhat
8677,2021-05-27 16:35:00,2120.32077
8678,2021-05-27 16:36:00,2120.18591
8679,2021-05-27 16:37:00,2120.051524
8680,2021-05-27 16:38:00,2119.917621
8681,2021-05-27 16:39:00,2119.784211


In [16]:
forecasted_final = test.merge(forecast)

In [17]:
forecasted_final.head()

Unnamed: 0,ds,y,yhat
0,2021-05-21 15:58:00,1260.0,735.132739
1,2021-05-21 15:59:00,1260.0,735.468814
2,2021-05-21 16:00:00,1260.0,735.804892
3,2021-05-21 16:01:00,1260.0,736.140978
4,2021-05-21 16:02:00,1260.0,736.477083


### Create future dataframe upto 2021-05-27 00:00

In [18]:
# future = pd.date_range("2021-05-27 16:40", "2021-05-27 23:59", freq="1min")

In [19]:
# data = {'ds':future}
# future_df = pd.DataFrame(data)

In [20]:
# final_df = pd.concat([df, future_df])

### Generate predictions

In [21]:
# forecast = m.predict(final_df)
# forecast = forecast[['ds', 'yhat']]
# forecast.tail()

In [22]:
# forecasted_final = final_df.merge(forecast)

In [23]:
# forecasted_final.plot(x='ds', figsize=(20,10))

### Calculate accuracy

In [24]:
forecasted_final['actual_up_trend'] = forecasted_final['y'].pct_change(5)
forecasted_final['foreasted_up_trend'] = forecasted_final['yhat'].pct_change(5)

In [30]:
forecasted_final = forecasted_final.fillna(0)

In [48]:
def f(row):
    ratio = 0.03
    if (row['actual_up_trend'] > ratio) and (row['foreasted_up_trend'] > ratio):
        val = 1
    elif (row['actual_up_trend'] < ratio) and (row['foreasted_up_trend'] > ratio):
        val = 0
    else:
        val = 2
    return val

### Counting the number of 1 and 0 values

In [49]:
forecasted_final['result'] = forecasted_final.apply(f, 1)

In [50]:
forecasted_final[forecasted_final['result'] == 0]

Unnamed: 0,ds,y,yhat,actual_up_trend,foreasted_up_trend,result


In [51]:
forecasted_final['result'].value_counts()

2    8682
Name: result, dtype: int64

In [43]:
341 / (393+341)

0.4645776566757493