In [33]:
#!/usr/bin/env python3
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor


from scipy.stats.stats import pearsonr


In [41]:
def evaluate_prediction(predicted_vals, y_matr):    
    if isinstance(y_matr, pd.DataFrame):
        y_matr = Y_test.as_matrix()

    C_corr_p = pearsonr(predicted_vals[:,0], y_matr[:,0])
    M_corr_p = pearsonr(predicted_vals[:,1], y_matr[:,1])
    X_corr_p = pearsonr(predicted_vals[:,2], y_matr[:,2])

    for flare_class, corr_p in zip(["C", "M", "X"], [C_corr_p, M_corr_p, X_corr_p]):
        print("Flare class:  {} | Pearson R = {:.3f} | two - tailed p = {:.2e} ".format(
            flare_class,
            corr_p[0],
            corr_p[1]))
    return (C_corr_p, M_corr_p, X_corr_p)
    

In [12]:
df = pd.read_csv("../data/flare.data1", sep  = " ", skiprows  = 1, header = None, 
                 dtype={"0": "category", "1": "category", "2": "category"})
df.columns = ["Class", "largest spot", "spot distr.", "Activity", "Evolution", "Prev 24h act", 
             "Hist. complx.", "Did reg. bec. complx.", "Area", "Area of lrgst. spt.", 
             "C-class - nxt 24h",
             "M-Class - nxt 24h",
             "X-class - nxt 24h"]

df_numerical = pd.get_dummies(df, columns = ["Class", "largest spot", "spot distr."])
df_numerical.head()

Unnamed: 0,Activity,Evolution,Prev 24h act,Hist. complx.,Did reg. bec. complx.,Area,Area of lrgst. spt.,C-class - nxt 24h,M-Class - nxt 24h,X-class - nxt 24h,...,largest spot_A,largest spot_H,largest spot_K,largest spot_R,largest spot_S,largest spot_X,spot distr._C,spot distr._I,spot distr._O,spot distr._X
0,1,2,1,1,2,1,2,0,0,0,...,0,0,0,0,1,0,0,0,1,0
1,1,3,1,1,2,1,2,0,0,0,...,0,0,0,0,1,0,0,0,1,0
2,1,3,1,1,2,1,1,0,0,0,...,0,0,0,0,1,0,0,0,1,0
3,1,3,1,1,2,1,2,0,0,0,...,0,0,0,0,1,0,0,0,1,0
4,1,3,1,1,2,1,2,0,0,0,...,1,0,0,0,0,0,0,0,1,0


In [14]:
df.shape[0]


323

In [43]:
train = df_numerical[:280]
test = df_numerical[280:]


Flare class:  C | Pearson R = 0.324 | two - tailed p = 3.40e-02 
Flare class:  M | Pearson R = 0.256 | two - tailed p = 9.77e-02 
Flare class:  X | Pearson R = -0.065 | two - tailed p = 6.77e-01 


In [45]:
Y_train, Y_test= list(map(lambda x: x.filter(
             ["C-class - nxt 24h",
             "M-Class - nxt 24h",
             "X-class - nxt 24h"],
             axis = 1), 
            [train, test]))

X_train, X_test = list(map(lambda x: x.drop(labels = ["C-class - nxt 24h",
             "M-Class - nxt 24h",
             "X-class - nxt 24h"],
            axis = 1), [train, test]))



tree = DecisionTreeRegressor(random_state = 0)
tree.fit(X_train, Y_train)
predicted_vals = tree.predict(X_test)

a = evaluate_prediction(predicted_vals, Y_test)
    

Flare class:  C | Pearson R = 0.324 | two - tailed p = 3.40e-02 
Flare class:  M | Pearson R = 0.256 | two - tailed p = 9.77e-02 
Flare class:  X | Pearson R = -0.065 | two - tailed p = 6.77e-01 
