In [8]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn import preprocessing
import numpy as np

def perform_PCA(X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    scaler = preprocessing.StandardScaler()
    X = scaler.fit_transform(X)

    pca = PCA(n_components=2)
    pca.fit(X)  # Learn the projection matrix
    Z = pca.transform(X) # Project the given data with the learnt projection matrix
    
    PC1, PC2 = pca.components_ # Since n_components = 2
    PCs = np.vstack((PC1.reshape(1, -1), PC2.reshape(1, -1))) # Rows refer to each PC; Columns refer to each data attribute
    return Z, PCs


def perform_TSNE(X: np.ndarray, perplexity: int = 5) -> np.ndarray:
    scaler = preprocessing.StandardScaler()
    X = scaler.fit_transform(X)

    tsne = TSNE(n_components=2, perplexity=perplexity)
    Z = tsne.fit_transform(X)
    return Z


In [16]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from time import time

import seaborn as sns
sns.set(style="whitegrid")

import warnings
warnings.filterwarnings('ignore')

# NOTE: current directory: ECS273-Winter2023/Assignment/Vue-Flask-Template/dashboard

def apply_arima(data: pd.DataFrame, column: str, plot=False):
    arima = ARIMA(data[column], order=(12,1,1))
    predictions = arima.fit().predict()
    
    if plot:
        plt.figure(figsize=(16,4))
        plt.plot(data['Total'], label="Actual")
        plt.plot(predictions, label="Predicted")
        plt.title('Sales in 1000s of Units', fontsize=20)
        plt.ylabel('Sales', fontsize=16)
        plt.legend()
    return predictions

def apply_sarima(data: pd.DataFrame, column: str, plot=False):
    sarima = SARIMAX(data[column],
                order=(1,1,1),
                seasonal_order=(1,1,0,12))
    predictions = sarima.fit().predict()
    
    if plot:
        plt.figure(figsize=(16,4))
        plt.plot(data, label="Actual")
        plt.plot(predictions, label="Predicted")
        plt.title('Sales in 1000s of Units', fontsize=20)
        plt.ylabel('Sales', fontsize=16)
        plt.legend()
    return predictions

###################        Example     ################### 
# sales = prepare_time_template_data()
# arima_prediction = apply_arima(sales, "Total")
# sarima_prediction = apply_sarima(sales, "Total")
##########################################################

def _parser(s):
    return datetime.strptime(s, '%Y-%m-%d')

def prepare_time_template_data(plot=False) -> pd.DataFrame:
    sales = pd.read_csv('../server/data/time_template.csv', parse_dates=[0], index_col=0, date_parser=_parser)

    #plot
    if plot:
        plt.figure(figsize=(14,4))
        plt.plot(sales)
        plt.title('Sales in 1000s of Units', fontsize=20)
        plt.ylabel('Sales', fontsize=16)
        
        plt.rc('figure',figsize=(14,8))
        plt.rc('font',size=15)

        result = seasonal_decompose(sales,model='additive')
        fig = result.plot()
        
        plot_acf(sales['Total'], lags=48);
        plot_pacf(sales['Total'], lags=30);
    return sales




In [10]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
# from resources.hd_processing_template import perform_PCA, perform_TSNE
#from resources.network_process_template import contsruct_networkx
#from resources.text_processing_template import preprocess
#from resources.time_processing_template import prepare_time_template_data, apply_arima, apply_sarima
import matplotlib.pyplot as plt
import seaborn as sns
data=pd.read_excel('./Dry_Bean_Dataset.xlsx')
ans=[]
for i in data['Class'].unique():
    ans.append({'name':i,'value': (data['Class']==i).sum()  })
    
ans


[{'name': 'SEKER', 'value': 2027},
 {'name': 'BARBUNYA', 'value': 1322},
 {'name': 'BOMBAY', 'value': 522},
 {'name': 'CALI', 'value': 1630},
 {'name': 'HOROZ', 'value': 1928},
 {'name': 'SIRA', 'value': 2636},
 {'name': 'DERMASON', 'value': 3546}]

In [7]:
(data['Class']==i).sum()

3546

In [97]:
air_data['year']=air_data['Date'].dt.year
air_data['date']=air_data['Date'].dt.strftime('%m-%d')
points=air_data[['date','CO(GT)','year']]
points.drop(points[points['CO(GT)']=='-200.0'].index)
targets=air_data['year'].unique()
points[:150],type(targets)
# points.drop(points[points['CO(GT)']=='-200.0'].index), points.to_dict(orient='records'), list(targets)

(      date  CO(GT)  year
 0    03-10     2.6  2004
 1    03-10     2.0  2004
 2    03-10     2.2  2004
 3    03-10     2.2  2004
 4    03-10     1.6  2004
 ..     ...     ...   ...
 145  03-16     3.9  2004
 146  03-16     3.2  2004
 147  03-16     5.1  2004
 148  03-16     2.6  2004
 149  03-16     1.7  2004
 
 [150 rows x 3 columns],
 numpy.ndarray)

In [51]:
# test=air_data[['year','month','CO(GT)']].pivot("year","month","CO(GT)")

In [94]:
data: dict = load_wine()
X: np.ndarray = data.data
y: np.ndarray = data.target
#feat_names: np.ndarray = data.feature_names
target_names: np.ndarray = data.target_names

Z, _ = perform_PCA(X)
points = pd.DataFrame(Z, columns=['posX', 'posY'])
points['cluster'] = y
print(type(points),type(points['cluster'][0]),points['posX'][0], type(target_names))
points
# How to JSON serialize pandas dataframes and numpy arrays
# points.to_dict(orient='records'), list(target_names)

<class 'pandas.core.frame.DataFrame'> <class 'numpy.int64'> 3.3167508122147775 <class 'numpy.ndarray'>


Unnamed: 0,posX,posY,cluster
0,3.316751,-1.443463,0
1,2.209465,0.333393,0
2,2.516740,-1.031151,0
3,3.757066,-2.756372,0
4,1.008908,-0.869831,0
...,...,...,...
173,-3.370524,-2.216289,2
174,-2.601956,-1.757229,2
175,-2.677839,-2.760899,2
176,-2.387017,-2.297347,2
