In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import autokeras as ak
from sklearn.model_selection import train_test_split
import os
import matplotlib.pyplot as plt
from sklearn.feature_selection import mutual_info_regression



In [2]:
from pebble import ProcessPool
from concurrent.futures import TimeoutError
from multiprocessing import freeze_support
import sys

import traceback
import timeit
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import make_pipeline
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
from sklearn.naive_bayes import BernoulliNB, CategoricalNB, ComplementNB, GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier, NearestCentroid
from sklearn.neural_network import MLPClassifier
from sklearn.semi_supervised import LabelPropagation
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier

from sklearn.ensemble import AdaBoostRegressor, BaggingRegressor, ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.svm import SVR, LinearSVR, NuSVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import LassoLarsIC, GammaRegressor, TweedieRegressor, BayesianRidge, ARDRegression,  LinearRegression, Ridge, RidgeCV, SGDRegressor, ElasticNet, HuberRegressor, QuantileRegressor, RANSACRegressor, TheilSenRegressor, PoissonRegressor, PassiveAggressiveRegressor, OrthogonalMatchingPursuit
from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.cross_decomposition import PLSRegression, PLSCanonical

def format_time(seconds):
    return time.strftime('%H:%M:%S', time.gmtime(seconds))

def initializer(limit):
    soft, hard = resource.getrlimit(resource.RLIMIT_AS)
    resource.setrlimit(resource.RLIMIT_AS, (limit, hard))

classifiers = {
    "AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=0),
    "Bagging": BaggingClassifier(n_estimators=10, random_state=0),
    "ExtraTrees (Gini)": ExtraTreesClassifier(criterion="gini", n_estimators=100, random_state=0),
    "ExtraTrees (Entropy)": ExtraTreesClassifier(criterion="entropy", n_estimators=100, random_state=0),
    "GradientBoosting": GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0),
    "RandomForest": RandomForestClassifier(max_depth=2, random_state=0),
    "HistGradientBoosting": HistGradientBoostingClassifier(),
    "GaussianProcess": GaussianProcessClassifier(random_state=0),
    "PassiveAggressive": PassiveAggressiveClassifier(max_iter=1000, random_state=0, tol=1e-3),
    "Ridge": RidgeClassifier(),
    "SGDClassifier": make_pipeline(StandardScaler(), SGDClassifier(max_iter=1000, tol=1e-3)),
    "BernoulliNaiveBayes": BernoulliNB(),
    "CategoricalNaiveBayes": CategoricalNB(),
    "ComplementNaiveBayes": ComplementNB(),
    "GaussianNaiveBayes": GaussianNB(),
    "MultinomialNaiveBayes": MultinomialNB(),
    "KNeighbors": KNeighborsClassifier(n_neighbors=3),
    "RadiusNeighbors": RadiusNeighborsClassifier(radius=1.0),
    "NearestCentroid": NearestCentroid(),
    "MLP": MLPClassifier(random_state=1, max_iter=300),
    "LabelPropagation": LabelPropagation(),
    "LinearSVC": make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5)),
    "NuSVC": make_pipeline(StandardScaler(), NuSVC()),
    "SVC": make_pipeline(StandardScaler(), SVC(gamma='auto')),
    "DecisionTree": DecisionTreeClassifier(random_state=0),
    "ExtraTree": ExtraTreeClassifier(random_state=0)
}

def test_classifier(classifier_type, X_train, X_test, y_train, y_test): 
    try:
        start_time = timeit.default_timer()
        print("\nClassifier", classifier_type, "...")
        clf = classifiers[classifier_type]
        clf.fit(X_train, y_train)
        y_predicted = clf.predict(X_test)
        accuracy = metrics.accuracy_score(y_test, y_predicted)
        print("\nClassifier", classifier_type, "accuracy is", accuracy)
        stop_time = timeit.default_timer()
        print("Classifier", classifier_type, "completed in", format_time(stop_time-start_time))
        return [classifier_type, accuracy]
    except Exception: 
        print("\nError for Classifier", classifier_type)
        traceback.print_exc()

def test_classifiers(X_train, X_test, y_train, y_test):
    result_queue = []
    if (LINUX):
        with ProcessPool(max_workers=POOL,initializer=initializer, initargs=(MAX_MEMORY,)) as pool:             
            multiple_results = [(pool.schedule(test_classifier, args=(key, X_train, X_test, y_train, y_test), timeout=TIMEOUT_SECONDS), key) for key in classifiers]
            for res in multiple_results:
                try:
                    tmp = res[0].result()
                    if tmp is not None:
                        result_queue.append(tmp)
                except TimeoutError:
                    print("\nClassifier", res[1], "exceeded the time limit.")
                except MemoryError:
                    print("\nClassifier", res[1], "exceeded the memory limit.")
    else: 
        with ProcessPool(max_workers=POOL) as pool:             
            multiple_results = [(pool.schedule(test_classifier, args=(key, X_train, X_test, y_train, y_test), timeout=TIMEOUT_SECONDS), key) for key in classifiers]
            for res in multiple_results:
                try:
                    tmp = res[0].result()
                    if tmp is not None:
                        result_queue.append(tmp)
                except TimeoutError:
                    print("\nClassifier", res[1], "exceeded the time limit.")

    accuracy = {}
    for value in result_queue:
        accuracy[value[0]] = value[1]
    accuracy = {k: v for k, v in sorted(accuracy.items(), key=lambda item: item[1], reverse=True)}
    
    print("Results: \n")
    untested = set(classifiers.keys())
    i = 1
    for key in accuracy:
        print(i, key, accuracy[key])
        untested.remove(key)
        i += 1
    print("\nUntested Classifiers:", untested)

regressors = {
    "AdaBoost (square)" : AdaBoostRegressor(random_state=0, n_estimators=100, loss="square"),
    "AdaBoost (linear)" : AdaBoostRegressor(random_state=0, n_estimators=100, loss="linear"),
    "Adaboost (exponential)" : AdaBoostRegressor(random_state=0, n_estimators=100, loss="exponential"),
    "Bagging" : BaggingRegressor(n_estimators=10, random_state=0),
    "Bagging (svr)": BaggingRegressor(base_estimator=SVR(), n_estimators=10, random_state=0),
    "ExtraTrees (abs err)" : ExtraTreesRegressor(criterion = "absolute_error", n_estimators=100, random_state=0),
    "ExtraTrees (sq err)" : ExtraTreesRegressor(criterion = "squared_error", n_estimators=100, random_state=0),
    "GradientBoosting (huber)" : GradientBoostingRegressor(random_state=0,loss="huber"),
    "GradientBoosting (sq err)" : GradientBoostingRegressor(random_state=0,loss="squared_error"),
    "GradientBoosting (abs err)" : GradientBoostingRegressor(random_state=0,loss="absolute_error"),
    "Random Forest (sq err)" : RandomForestRegressor(max_depth=2, random_state=0,criterion="squared_error"),
    "Random Forst (abs err)" : RandomForestRegressor(max_depth=2, random_state=0,criterion="absolute_error"),
    "Random Forest (poisson)" : RandomForestRegressor(max_depth=2, random_state=0,criterion="poisson"),
    "HistGradientBoosting (sq err)" : HistGradientBoostingRegressor(loss="squared_error"),
    "HistGradientBoosting (abs err)" : HistGradientBoostingRegressor(loss="absolute_error"),
    "HistGradientBoosting (poisson)" : HistGradientBoostingRegressor(loss="poisson"),
    "GaussianProcess" : GaussianProcessRegressor(random_state=0),
    "Linear" : LinearRegression(),
    "Ridge (Linear)" : Ridge(),
    "RidgeCV" : RidgeCV(),
    "SGDRegressor (elasticnet)" : make_pipeline(StandardScaler(),SGDRegressor(max_iter=1000, tol=1e-3,penalty="elasticnet")),
    "SGDRegressor (l2)" : make_pipeline(StandardScaler(),SGDRegressor(max_iter=1000, tol=1e-3,penalty="l2")),
    "SGDRegressor (l1)" : make_pipeline(StandardScaler(),SGDRegressor(max_iter=1000, tol=1e-3,penalty="l1")),
    "Elastic Net (random)" : ElasticNet(random_state=0,selection="random"),
    "Elastic Net (cyclic)" : ElasticNet(random_state=0,selection="cyclic"),
    "ARD" : ARDRegression(),
    "BayesianRidge" : BayesianRidge(),
    "Huber" : HuberRegressor(),
    "Quantile (highs-ds)" : QuantileRegressor(quantile=0.8, solver="highs-ds"),
    "Quantile (highs-ipm)" : QuantileRegressor(quantile=0.8, solver="highs-ipm"),
    "Quantile (highs)" : QuantileRegressor(quantile=0.8, solver="highs"),
    "Quantile (interior-point)" : QuantileRegressor(quantile=0.8, solver="interior-point"),
    "Quantile (revised simplex)" : QuantileRegressor(quantile=0.8, solver="revised simplex"),
    "RANSAC": RANSACRegressor(random_state=0),
    "TheilSenRegressor" : TheilSenRegressor(random_state=0),
    "PoissonRegressor" : PoissonRegressor(),
    "TweedieRegressor (auto)" : TweedieRegressor(link="auto"),
    "TweedieRegressor (identity)" : TweedieRegressor(link="identity"),
    "TweedieRegressor (log)" : TweedieRegressor(link="log"),
    "GammaRegressor" : GammaRegressor(),
    "PassiveAggressiveRegressor (epsilon_insensitive)" :  PassiveAggressiveRegressor(max_iter=100, random_state=0, tol=1e-3, loss="epsilon_insensitive"),
    "PassiveAggressiveRegressor (squared_epsilon_insensitive)" :  PassiveAggressiveRegressor(max_iter=100, random_state=0, tol=1e-3, loss="squared_epsilon_insensitive"),
    "KNeighbors" : KNeighborsRegressor(n_neighbors=3),
    "Radius Neighbors" : RadiusNeighborsRegressor(radius=1.0),
    "MLP" : MLPRegressor(random_state=1, max_iter=500),
    "DecisionTree" : DecisionTreeRegressor(random_state=0),
    "Extra Tree" : ExtraTreeRegressor(random_state=0),
    "Kernel Ridge" : KernelRidge(alpha=1.0),
    "Linear SVR (epsilon_insensitive)" : make_pipeline(StandardScaler(), LinearSVR(random_state=0, tol=1e-5, loss="epsilon_insensitive")),
    "Linear SVR (squared_epsilon_insensitive)" : make_pipeline(StandardScaler(), LinearSVR(random_state=0, tol=1e-5, loss="squared_epsilon_insensitive")),
    "nuSVR" : make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1)),
    "SVR" : make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2)),
    "LassoLarsIC (bic)" : LassoLarsIC(criterion='bic', normalize=False),
    "LassoLarsIC (aic)" : LassoLarsIC(criterion='aic', normalize=False),
    "PLS" : PLSRegression(n_components=2),
    "OrthogonalMatchingPursuit" : OrthogonalMatchingPursuit(),
    "PLSCanonical" : PLSCanonical(n_components=2)
}

def test_regressor(regressor_type, X_train, X_test, y_train, y_test): 
    try:
        start_time = timeit.default_timer()
        print("\nRegressor", regressor_type, "...")
        reg = regressors[regressor_type]
        reg.fit(X_train, y_train)
        y_predicted = reg.predict(X_test)
        accuracy = metrics.mean_absolute_error(y_test, y_predicted)
        print("\nRegressor", regressor_type, "accuracy is", accuracy)
        stop_time = timeit.default_timer()
        print("Regressor", regressor_type, "completed in", format_time(stop_time-start_time))
        return [regressor_type, accuracy]
    except Exception: 
        print("\nError for Regressor", regressor_type)
        traceback.print_exc()

def test_regressors(X_train, X_test, y_train, y_test,):
    result_queue = []
    if (LINUX):
        with ProcessPool(max_workers=POOL,initializer=initializer, initargs=(MAX_MEMORY,)) as pool:             
            multiple_results = [(pool.schedule(test_regressor, args=(key, X_train, X_test, y_train, y_test), timeout=TIMEOUT_SECONDS), key) for key in regressors]
            for res in multiple_results:
                try:
                    tmp = res[0].result()
                    if tmp is not None:
                        result_queue.append(tmp)
                except TimeoutError:
                    print("\nClassifier", res[1], "exceeded the time limit.")
                except MemoryError:
                    print("\nClassifier", res[1], "exceeded the memory limit.")
    else: 
        with ProcessPool(max_workers=POOL) as pool:             
            multiple_results = [(pool.schedule(test_regressor, args=(key, X_train, X_test, y_train, y_test), timeout=TIMEOUT_SECONDS), key) for key in regressors]
            for res in multiple_results:
                try:
                    tmp = res[0].result()
                    if tmp is not None:
                        result_queue.append(tmp)
                except TimeoutError:
                    print("\nClassifier", res[1], "exceeded the time limit.") 

    mae = {}
    for value in result_queue:
        mae[value[0]] = value[1]
    mae = {k: v for k, v in sorted(mae.items(), key=lambda item: item[1], reverse=False)}

    print("Results: \n")
    untested = set(regressors.keys())
    i = 1
    for key in mae:
        print(i, key, mae[key])
        untested.remove(key)
        i += 1
    print("\nUntested Regressors:", untested)

if __name__ == "__main__":
    TIMEOUT_SECONDS = 60 * 10
    POOL = 4
    MAX_MEMORY = 1572864
    SILENT = False

    if ("win" in sys.platform):
        print("Windows Detected")
        freeze_support()
        LINUX = False
    else: 
        print("Linux Assumed:")
        import resource
        LINUX = True
    if SILENT:
        import warnings
        warnings.filterwarnings("ignore")

Linux Assumed:


In [3]:
os.listdir()

['final.csv',
 'structured_data_regressor',
 'submission.csv',
 'train.csv',
 'test.csv',
 '.ipynb_checkpoints',
 'sigan.ipynb']

In [4]:
trainDf = pd.read_csv('/home/test/Desktop/Cyberthon/final/signal analysis/train.csv')
print(trainDf)

     index   feature 0  feature 1  feature 2  feature 3  feature 4  feature 5  \
0        0  2014-11-23        2.6        2.4        2.6        2.6       27.0   
1        1  2014-01-24        0.0        0.0        0.0        0.0       25.2   
2        2  2017-07-01        0.0        0.0        0.0        0.0       29.3   
3        3  2015-09-29        0.0        0.0        0.0        0.0       28.2   
4        4  2016-11-06        1.0        0.8        1.0        1.0       28.1   
..     ...         ...        ...        ...        ...        ...        ...   
895    895  2014-03-12        0.0        0.0        0.0        0.0       27.7   
896    896  2016-02-06        2.6        1.2        1.2        1.2       28.0   
897    897  2014-05-19       29.4       18.8       27.6       28.6       27.1   
898    898  2017-04-07        3.6        1.2        1.8        2.8       27.7   
899    899  2015-01-17        0.0        0.0        0.0        0.0       26.5   

     feature 6  feature 7  

In [5]:
y = trainDf["predict"]
X = trainDf.drop("predict", axis = 1)
print(X)
print(y)

     index   feature 0  feature 1  feature 2  feature 3  feature 4  feature 5  \
0        0  2014-11-23        2.6        2.4        2.6        2.6       27.0   
1        1  2014-01-24        0.0        0.0        0.0        0.0       25.2   
2        2  2017-07-01        0.0        0.0        0.0        0.0       29.3   
3        3  2015-09-29        0.0        0.0        0.0        0.0       28.2   
4        4  2016-11-06        1.0        0.8        1.0        1.0       28.1   
..     ...         ...        ...        ...        ...        ...        ...   
895    895  2014-03-12        0.0        0.0        0.0        0.0       27.7   
896    896  2016-02-06        2.6        1.2        1.2        1.2       28.0   
897    897  2014-05-19       29.4       18.8       27.6       28.6       27.1   
898    898  2017-04-07        3.6        1.2        1.8        2.8       27.7   
899    899  2015-01-17        0.0        0.0        0.0        0.0       26.5   

     feature 6  feature 7  

In [6]:
#need feature engineering here (before the model)

In [7]:
X["year"] = X["feature 0"].str.slice(stop = 4)
X["month"] = X["feature 0"].str.slice(start=5, stop=7)
X["day"] = X["feature 0"].str.slice(start=8, stop=10)
X = X.drop("feature 0", axis=1)
'''
X["feature 1"] *= 10
'''


'\nX["feature 1"] *= 10\n'

In [8]:
'''
features = 
def make_mi_scores(X, y, discrete_features):
    mi_scores = mutual_info_regression(X, y, discrete_features= ["year", "month"])
    mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)
    mi_scores = mi_scores.sort_values(ascending=False)
    return mi_scores

mi_scores = make_mi_scores(X, y, features)
mi_scores[::3]  # show a few features with their MI scores
'''

'\nfeatures = \ndef make_mi_scores(X, y, discrete_features):\n    mi_scores = mutual_info_regression(X, y, discrete_features= ["year", "month"])\n    mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)\n    mi_scores = mi_scores.sort_values(ascending=False)\n    return mi_scores\n\nmi_scores = make_mi_scores(X, y, features)\nmi_scores[::3]  # show a few features with their MI scores\n'

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33,random_state=42)
from sklearn.metrics import mean_absolute_error

In [10]:
#test_regressors(X_train, X_test, y_train, y_test)

In [11]:
'''
from sklearn.linear_model import QuantileRegressor

model1 = QuantileRegressor()
model1.fit(X_train, y_train)
y_pred = model1.predict(X_test)

mae = mean_absolute_error
print(mae(y_test, y_pred))
'''

'\nfrom sklearn.linear_model import QuantileRegressor\n\nmodel1 = QuantileRegressor()\nmodel1.fit(X_train, y_train)\ny_pred = model1.predict(X_test)\n\nmae = mean_absolute_error\nprint(mae(y_test, y_pred))\n'

In [36]:
from sklearn.ensemble import ExtraTreesRegressor
model2 = ExtraTreesRegressor(random_state = 42)
model2.fit(X_train, y_train)
y_pred2 = model2.predict(X_test)


mae = mean_absolute_error
print(mae(y_test, y_pred2))

1.41129058130228


In [37]:
from sklearn.ensemble import GradientBoostingRegressor
model3 = GradientBoostingRegressor(random_state = 42)
model3.fit(X_train, y_train)
y_pred3 = model3.predict(X_test)

mae = mean_absolute_error
print(mae(y_test, y_pred3))

1.003553865575501


In [14]:
'''
reg = ak.StructuredDataRegressor(
    overwrite=True,
    max_trials=100,
)

reg.fit(
    x=X, #replace with wtv features u want
    y=y, #label
    validation_split=0.15, #validation split
    epochs=100, #experiment with this, too much may cause overfitting
)
'''

'\nreg = ak.StructuredDataRegressor(\n    overwrite=True,\n    max_trials=100,\n)\n\nreg.fit(\n    x=X, #replace with wtv features u want\n    y=y, #label\n    validation_split=0.15, #validation split\n    epochs=100, #experiment with this, too much may cause overfitting\n)\n'

In [15]:
testDf = pd.read_csv('/home/test/Desktop/Cyberthon/final/signal analysis/test.csv')
testDf

Unnamed: 0,index,feature 0,feature 1,feature 2,feature 3,feature 4,feature 5,feature 6,feature 7,feature 9
0,900,2017-09-03,0.0,0.0,0.0,0.0,29.2,32.1,27.4,36.7
1,901,2014-11-04,0.0,0.0,0.0,0.0,29.7,33.8,26.5,29.2
2,902,2015-02-04,7.0,5.2,6.4,7.0,25.1,27.1,23.1,34.2
3,903,2015-10-27,0.0,0.0,0.0,0.0,28.4,33.1,25.6,28.4
4,904,2014-01-17,0.0,0.0,0.0,0.0,25.6,28.6,23.5,41.8
...,...,...,...,...,...,...,...,...,...,...
164,1064,2014-01-01,0.0,0.0,0.0,0.0,26.3,28.8,24.3,34.2
165,1065,2017-09-08,37.4,21.8,29.8,37.4,27.2,32.4,23.5,49.7
166,1066,2014-05-09,21.8,17.2,21.4,21.6,28.1,32.2,25.8,37.8
167,1067,2015-06-14,10.4,6.8,8.2,10.2,26.3,28.1,24.2,42.8


In [16]:
testDf["year"] = testDf["feature 0"].str.slice(stop = 4)
testDf["month"] = testDf["feature 0"].str.slice(start=5, stop=7)
testDf["day"] = testDf["feature 0"].str.slice(start=8, stop=10)
testDf = testDf.drop("feature 0", axis=1)
print(testDf)

     index  feature 1  feature 2  feature 3  feature 4  feature 5  feature 6  \
0      900        0.0        0.0        0.0        0.0       29.2       32.1   
1      901        0.0        0.0        0.0        0.0       29.7       33.8   
2      902        7.0        5.2        6.4        7.0       25.1       27.1   
3      903        0.0        0.0        0.0        0.0       28.4       33.1   
4      904        0.0        0.0        0.0        0.0       25.6       28.6   
..     ...        ...        ...        ...        ...        ...        ...   
164   1064        0.0        0.0        0.0        0.0       26.3       28.8   
165   1065       37.4       21.8       29.8       37.4       27.2       32.4   
166   1066       21.8       17.2       21.4       21.6       28.1       32.2   
167   1067       10.4        6.8        8.2       10.2       26.3       28.1   
168   1068        4.4        3.4        3.6        3.6       28.3       31.4   

     feature 7  feature 9  year month d

In [38]:
y_final = model3.predict(testDf)

In [39]:
#y_predictions = reg.predict(testDf[['feature 5', 'feature 6', 'feature 7', 'feature 9']])
#change features to match earlier

In [40]:
print(y_final)

[ 9.01629954  8.04020216  9.28543027  7.13420862 13.54772392 10.60152818
  9.20555659  8.3158969   7.52029158  6.95039162  7.90185249  6.72481779
  7.91073562 13.77847719  6.78043496 12.27733252  8.49587799  6.63096845
  8.15050226 10.87954649  5.99867146  6.83312995  7.15260478 10.63439139
  7.56026535 10.37568295  7.60930778  7.62181955 10.77370751  6.84694854
  6.58402295  7.25710801  5.85966176  9.48176943  8.20493524  6.84158912
  7.01416719 14.11755812  6.56235941  8.5284506   8.54496102  7.13187566
  7.13640665  9.86537595  8.52398267  7.17304326  7.26979356  8.00779038
  6.81401047 14.55602311  6.93268769  7.53531062  6.44340895  7.47208887
  7.71595631  7.65851493  5.99407021 13.46541984  6.39410957  8.19588364
  8.03712582 12.84117099  6.17867326  8.25710197  8.94855536 13.71404796
 10.29409461  6.56140199  7.78748671  6.87958157  6.58862969 12.4773502
 10.35402897  6.76775393  7.30774609  7.99598644  7.64085147  9.01843595
  6.69790143  6.1427181   6.45461098  7.29994143  8.

In [41]:
submissionDf = pd.read_csv('/home/test/Desktop/Cyberthon/final/signal analysis/submission.csv')
submissionDf

Unnamed: 0,index,predict
0,900,???
1,901,???
2,902,???
3,903,???
4,904,???
...,...,...
164,1064,???
165,1065,???
166,1066,???
167,1067,???


In [42]:
submissionDf.predict = y_final

In [43]:
submissionDf

Unnamed: 0,index,predict
0,900,9.016300
1,901,8.040202
2,902,9.285430
3,903,7.134209
4,904,13.547724
...,...,...
164,1064,12.251721
165,1065,6.459482
166,1066,6.946973
167,1067,6.740551


In [44]:
submissionDf.to_csv('final.csv')