In [1]:
import pandas as pd
from pathlib import Path
from statsmodels.tsa.seasonal import seasonal_decompose
%matplotlib inline

In [6]:
class MLgorithmic:
    
    def __init__(self):
        import pandas as pd
        from pathlib import Path
        from sklearn.model_selection import train_test_split
        
        
    def decompose(self, data, method='seasonal', model='Additive'):
        '''
        params: data is a signle dataset that can be used in the analysis
        params: model is the Additive or Multiplicative model. 
        params: method is seasonal for seasonal decompose or hp for hodrick prescott. 
        '''
        from statsmodels.tsa.seasonal import seasonal_decompose
        import statsmodels.api as sm
        
        if method == 'seasonal':
            decomposed = seasonal_decompose(data, model=model)
            return decomposed
        elif model =='hp':
            ts_noise, ts_trend = sm.tsa.filters.hpfilter(df['close'])
            return ts_noise, ts_trend
        
    
    def run_adfuller(self, data):
        '''
        Presence of unit root means the time series is non-stationary.
        p < 0.05 to reject the null of unit root. If not rejected, the time series is non stationary.
        If rejected, the series is stationary. p value is result[1]
        if p < 0.05, stationary else non stationary
        '''
        from statsmodels.tsa.stattools import adfuller
        result = adfuller(data)
        return result
    
    def arma_arima(self, data, ar=1, ma=1, i=1, lags=0, model='arma', run_acf_pacf=False):
        '''
        AR(p) makes predictions using previous values of the dependent variable, while the MA uses the mean and previous errors. 
        param: lags only needed if run_acf is true and sets the lags paramater on the pplot. 
        PACF to determine AR and ACF plot to determine MA and i
        
        '''
        from statsmodels.tsa.arima_model import ARMA
        from statsmodels.tsa.arima_model import ARIMA
        
        if model == 'arma':
            model = ARMA(data, order=(ar,ma))
            results = model.fit()
        elif model == 'arima':
            model = ARIMA(data, order=(ar, i, ar))
            results = model.fit()
        if run_acf:
            import statsmodels as sm
            from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
            plot_acf(data, lags=lags);
            
            sm.graphics.tsaplots.plot_pacf(data, lags=lags, zero=False);
            
        return results
    
    def arch_garch_model(self, data, start_date_for_forecast='', forecast_horizon=10, ar=1, ma=1, mean="Zero", vol="GARCH", run_forecast=False):
        '''
        Forecast horizon by default is 10
        param: start date for forecast is string date representation.
        '''
        from arch import arch_model
        
        model = arch_model(returns, mean=mean, vol=vol, p=ar, q=ma)
        res = model.fit(disp="off")
        
        if run_forecast:
            forecasts = res.forecast(start=start_date_for_forecast, horizon=forecast_horizon)
            return res, forecast
        else:
            pass
        
        
        return res
    
    def runLinearRegression(self, data_x, data_y):
        '''
        Returns a tupe, of 8 values. 
        Data may require shaping. [samples, features]
        '''
        
        from sklearn.metrics import mean_squared_error, r2_score
        import matplotlib.pyplot as plt
        from sklearn.linear_model import LinearRegression
        import numpy as np
        model = LinearRegression()
        model.fit(data_x, data_y)
        coeff = model.coef_
        intercept = model.intercept_
        predicted_y_values = model.predict(X)
        
        score = model.score(data_x, data_y, sample_weight=None)
        r2 = r2_score(data_y, predicted_y_values)
        mse = mean_squared_error(data_y, predicted_y_values)
        rmse = np.sqrt(mse)
        std = np.std(y)
        
        return coeff, intercept, score, r2, mse, rmse, std
    
    def split_test_and_train_data(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)
        
        return X_train, X_test, y_train, y_test
    
    def run_logistic_regression(self, data_x, data_y):
        from sklearn.linear_model import LogisticRegression
        classifier = LogisticRegression(solver='lbfgs', random_state=1)
    
    def encode_data(self, data, columns_to_encode):
        X_binary_encoded = pd.get_dummies(data, columns=columns_to_encode)