In [40]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import Normalizer
from scipy.linalg import norm
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
import csv
from sklearn.cluster import KMeans

class FCM:
    def __init__(self, n_clusters=10, max_iter=150, m=2, error=1e-5, random_state=42):
        self.u, self.centers = None, None
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.m = m
        self.error = error
        self.random_state = random_state

    def fit(self, X):
        self.n_samples = X.shape[0]
        # u = np.random.dirichlet(np.ones(C), size=N)
        r = np.random.RandomState(self.random_state)
        u = r.rand(self.n_samples,self.n_clusters)
        u = u / np.tile(u.sum(axis=1)[np.newaxis].T,self.n_clusters)
        r = np.random.RandomState(self.random_state)
        self.u = r.rand(self.n_samples,self.n_clusters)
        self.u = self.u / np.tile(self.u.sum(axis=1)[np.newaxis].T,self.n_clusters)
        for iteration in range(self.max_iter):
            u_old = self.u.copy()
            self.centers = self.next_centers(X)
            self.u = self._predict(X)
            # Stopping rule
            if norm(self.u - u_old) < self.error:
                break
        return self

    def next_centers(self, X):
        um = self.u ** self.m
        return (X.T @ um / np.sum(um, axis=0)).T

    def _predict(self, X):
        power = float(2 / (self.m - 1))
        temp = cdist(X, self.centers) ** power
        denominator_ = temp.reshape((X.shape[0], 1, -1)).repeat(temp.shape[-1], axis=1)
        denominator_ = temp[:, :, np.newaxis] / denominator_
        return 1 / denominator_.sum(2)

    def predict(self, X):
        if len(X.shape) == 1:
            X = np.expand_dims(X, axis=0)
        u = self._predict(X)
        return np.argmax(u, axis=-1)

In [77]:
df= pd.read_csv("stock-data.csv", index_col = 0)
df_scaled = Normalizer().fit_transform(df)

fcm = FCM(n_clusters = 4)
Transformed_vector = fcm.fit(df_scaled)
fcm_labels  = fcm.predict(df_scaled)
c_name = df.index
df_fcm = pd.DataFrame({'labels': fcm_labels, 'companies': c_name})
df_fcm.sort_values('labels', inplace = True)
df_fcm_3 = df_fcm[df_fcm['labels'] == 3]
df_fcm_3.to_csv("output.csv", index = False)

corr_value = df.loc['Amazon'].corr(df.loc['Yahoo'])
corr_value = round(corr_value, 4)
with open("output1.csv", mode = "w") as f:
    f.write(str(corr_value))
    
pca = PCA(n_components = 40, random_state = 11)
Transformed_vector_pca = pca.fit_transform(df_scaled)
row_col = list(Transformed_vector_pca.shape)
with open("output2.csv", 'w') as f:
    csvw = csv.writer(f)
    csvw.writerow(row_col)

num_of_clusters = range(2,15)
error = []
for num_clusters in num_of_clusters:
    clusters = KMeans(num_clusters)
    clusters.fit(Transformed_vector_pca)
    error.append(clusters.inertia_/100)
    
df_clusters = pd.DataFrame({"Cluster_Numbers":num_of_clusters, "Error_Term":error})
with open("output3.csv", mode = "w") as f:
    f.write(str('11'))
    
clusters1 = KMeans(n_clusters = 11, random_state = 0)
clusters1.fit(Transformed_vector_pca)
df_k = pd.DataFrame({'labels': clusters1.labels_, 'companies': c_name})
df_k.sort_values('labels', ascending = False, inplace = True)
df_k_4 = df_k[df_k['labels'] == 4]
df_k_4.to_csv("output4.csv", index = False)