In [None]:
class Point:
    
    def __init__(self, a, b):
        self.a = a
        self.b = b
    
    def __repr__(self):
        return f"Point({self.a}, {self.b})"
    
    # def __copy__(self):
    #     return Point(self.a, self.b)


In [2]:
from copy import copy

p1 = Point(1, 2)
p2 = copy(p1)
p2.b = 3
print(p1)
print(p2)

Point(1, 2)
Point(1, 3)


In [7]:
import os
import time
from copy import deepcopy

import numpy as np
from scipy.stats import skew, kurtosis

from context import Context
from data import Data
from sklearn.svm import SVC
import util


def feature_extract(data):
    start = time.time()
    features = np.empty(shape=(len(data), 24, 5))
    features[:, :, 0] = np.median(data, axis=2)
    features[:, :, 1] = np.std(data, axis=2)
    features[:, :, 2] = skew(data, axis=2, bias=False, )
    features[:, :, 3] = kurtosis(data, axis=2, bias=False)
    features[:, :, 4] = deepcopy(data[:, :, -1])
    features = features.reshape((len(data), 24 * features.shape[2]))
    end = time.time()
    print(f"Extracted statistical features in {(end - start) * 1000:.1f} ms")
    return features


In [0]:
print(f"cpu count: {os.cpu_count()}")
Context.data_dir = "/home/foumani/data/solar_flare_prediction"
Context.files_df_filename = "all_files.csv"
Context.files_np_filename = "full_data_X_1_25.npy"
context = Context(train_nf_n=1500, train_fl_n=800, val_part=1, test_part=2)
data = Data()

In [88]:
start = time.time()
# train_nf_n=1500, train_fl_n=800, 
context = Context(train_nf_n=2500, train_fl_n=800, val_part=1, test_part=2)
# _, _, _, _
# X_val, y_val, X_test, y_test
numpy_datasets = data.numpy_datasets(context)
X_train, y_train, _, _, _, _ = numpy_datasets
def prepare(X, y):
    X = feature_extract(X)
    indices = ~np.isnan(X).any(axis=1)
    return X[indices], y[indices]
X_train, y_train = prepare(X_train, y_train)
# X_val, y_val = prepare(X_val, y_val)
# X_test, y_test = prepare(X_test, y_test)
svc = SVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_val)

print(f"{(time.time() - start) * 1000:.1f} ms")

Training [3, 4, 5], Val 1, Test 2


100%|██████████| 3300/3300 [00:00<00:00, 19827.42files/s]


removing nans
preprocessed in 55.2 ms
prepared dataset in 416.7 ms
train:  2851 all,  2172 negative,   679 positive
prepared dataset in 335.6 ms
val  : 69189 all, 68009 negative,  1180 positive
prepared dataset in 386.6 ms
test : 79541 all, 78256 negative,  1285 positive
Extracted statistical features in 267.7 ms
3841.2 ms


In [89]:
import sklearn
tp = ((y_val == y_pred) & (y_val == 1)).sum()
fp = ((y_pred == 1) & (y_val != y_pred)).sum()
tn = ((y_val == y_pred) & (y_pred == 0)).sum()
fn = ((y_val != y_pred) & (y_pred == 0)).sum()
sklearn.metrics.confusion_matrix(y_val, y_pred)

array([[21795,  3440],
       [   73,   817]])

In [90]:
np.array([[tn, fp], [fn, tp]])

array([[21795,  3440],
       [   73,   817]])

In [91]:
metric = util.Metric(tp, fp, tn, fn)
print(metric.tss)

0.7816589229779399


In [16]:
np.isnan(feature_extract(X_train)).any(axis=1)

Extracted statistical features in 207.6 ms


array([False, False, False, ..., False, False, False])

In [2]:
import itertools
inputdata = [[400, 600, 800, 1000, 1250, 1500, 1750, 2000, 2400, 2800,
                           3200, 3800, 4400, 5400],
             [400, 600, 800, 1000, 1400]]
result = list(itertools.product(*inputdata))
print(len(result))

70


In [15]:
from sklearn.metrics import *
import numpy as np

y_true = [0, 1, 2, 0, 1, 2]
y_pred = [0, 2, 1, 0, 0, 1]

cnf_matrix = confusion_matrix(y_true, y_pred)
display(cnf_matrix)

fp = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)  
fn = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
tp = np.diag(cnf_matrix)
tn = cnf_matrix.sum() - (fp + fn + tp)
print(f"fp: {fp}")
print(f"fn: {fn}")
print(f"tp: {TP}")
print(f"tn: {tn}")
tpr = tp / (tp + fn)
tnr = tn / (tn + fp)
tss = tpr + tnr - 1
print(f"tss: {tss}")

array([[2, 0, 0],
       [1, 0, 1],
       [0, 2, 0]])

fp: [1 2 1]
fn: [0 2 2]
tp: [2 0 0]
tn: [3 2 3]
tss: [ 0.75 -0.5  -0.25]


In [1]:
import pandas

In [2]:
df = pandas.read_csv("/home/foumani/data/solar_flare_prediction/all_files.csv")

In [7]:
df

Unnamed: 0,path,active_region,partition,label
0,partition5/NF/B9.1@10451:Secondary_ar5550_s201...,ar5550,partition5,B
1,partition5/NF/FQ_ar5758_s2015-07-12T05:00:00_e...,ar5758,partition5,Q
2,partition5/NF/FQ_ar7204_s2017-12-14T14:12:00_e...,ar7204,partition5,Q
3,partition5/NF/FQ_ar5484_s2015-04-19T17:48:00_e...,ar5484,partition5,Q
4,partition5/NF/FQ_ar7045_s2017-06-19T19:00:00_e...,ar7045,partition5,Q
...,...,...,...,...
331180,partition2/FL/M1.7@5382:Primary_ar2362_s2013-0...,ar2362,partition2,M
331181,partition2/FL/M1.3@6145:Primary_ar2790_s2013-0...,ar2790,partition2,M
331182,partition2/FL/M1.4@5955:Primary_ar2716_s2013-0...,ar2716,partition2,M
331183,partition2/FL/M1.9@5510:Primary_ar2491_s2013-0...,ar2491,partition2,M


In [12]:
df_m = df[(df.label == "M")]

In [16]:
df_m

Unnamed: 0,path,active_region,partition,label
74375,partition5/FL/M7.6@11132:Primary_ar5983_s2015-...,ar5983,partition5,M
74376,partition5/FL/M1.3@11160:Primary_ar5983_s2015-...,ar5983,partition5,M
74377,partition5/FL/M1.9@11410:Primary_ar6052_s2015-...,ar6052,partition5,M
74378,partition5/FL/M3.8@10625:Primary_ar5673_s2015-...,ar5673,partition5,M
74379,partition5/FL/M1.0@10620:Primary_ar5692_s2015-...,ar5692,partition5,M
...,...,...,...,...
331180,partition2/FL/M1.7@5382:Primary_ar2362_s2013-0...,ar2362,partition2,M
331181,partition2/FL/M1.3@6145:Primary_ar2790_s2013-0...,ar2790,partition2,M
331182,partition2/FL/M1.4@5955:Primary_ar2716_s2013-0...,ar2716,partition2,M
331183,partition2/FL/M1.9@5510:Primary_ar2491_s2013-0...,ar2491,partition2,M


In [13]:
df_m.active_region.unique()

array(['ar5983', 'ar6052', 'ar5673', 'ar5692', 'ar6972', 'ar6206',
       'ar5982', 'ar6078', 'ar5956', 'ar5745', 'ar5456', 'ar5885',
       'ar6063', 'ar5415', 'ar5447', 'ar7075', 'ar6327', 'ar7115',
       'ar5526', 'ar5446', 'ar5637', 'ar6015', 'ar5738', 'ar5298',
       'ar4941', 'ar4225', 'ar4781', 'ar4466', 'ar4874', 'ar5127',
       'ar4580', 'ar4396', 'ar5144', 'ar4817', 'ar4920', 'ar4955',
       'ar5107', 'ar4698', 'ar5026', 'ar4294', 'ar4536', 'ar5011',
       'ar4344', 'ar4478', 'ar4231', 'ar4639', 'ar4678', 'ar4530',
       'ar3497', 'ar3836', 'ar3535', 'ar4186', 'ar4071', 'ar3879',
       'ar4138', 'ar3688', 'ar3563', 'ar3376', 'ar3813', 'ar3580',
       'ar3686', 'ar3721', 'ar3999', 'ar4197', 'ar3321', 'ar4000',
       'ar3311', 'ar3364', 'ar3520', 'ar3941', 'ar3295', 'ar3437',
       'ar3344', 'ar4097', 'ar3730', 'ar3341', 'ar3804', 'ar3766',
       'ar3366', 'ar3894', 'ar3784', 'ar3877', 'ar3779', 'ar3740',
       'ar833', 'ar1321', 'ar1449', 'ar1066', 'ar384', 'ar892'

In [15]:
df[(df.label == "X")].active_region.unique()

array(['ar7115', 'ar4781', 'ar4698', 'ar4920', 'ar5298', 'ar4536',
       'ar3341', 'ar3563', 'ar3894', 'ar3364', 'ar4000', 'ar377', 'ar892',
       'ar753', 'ar1449', 'ar401', 'ar833', 'ar1807', 'ar1834', 'ar3291'],
      dtype=object)

In [20]:
pandas.read_csv("/home/foumani/data/solar_flare_prediction/partition1/FL/M1.0@265:Primary_ar115_s2010-08-06T06:36:00_e2010-08-06T18:24:00.csv", sep="\t")[1:25]

Unnamed: 0,Timestamp,TOTUSJH,TOTBSQ,TOTPOT,TOTUSJZ,ABSNJZH,SAVNCPP,USFLUX,TOTFZ,MEANPOT,...,BFLARE_LABEL_LOC,CFLARE_LOC,CFLARE_LABEL_LOC,MFLARE_LOC,MFLARE_LABEL_LOC,XFLARE_LOC,XFLARE_LABEL_LOC,XR_MAX,XR_QUAL,IS_TMFI
1,2010-08-06 06:48:00,987.271725,15550480000.0,2.252867e+23,19713090000000.0,150.347978,3570546000000.0,1.781535e+22,-7.201674e+24,5907.434832,...,,0.0,,0.0,,0.0,,2.6111e-07,12,True
2,2010-08-06 07:00:00,985.015683,15501090000.0,2.238914e+23,19625610000000.0,153.06935,4284650000000.0,1.775269e+22,-7.188077e+24,5909.535936,...,,0.0,,0.0,,0.0,,2.5341e-07,12,True
3,2010-08-06 07:12:00,993.944093,15546840000.0,2.233768e+23,19789070000000.0,154.994888,3122913000000.0,1.799766e+22,-7.273269e+24,5779.465192,...,,0.0,,0.0,,0.0,,2.2592e-07,12,True
4,2010-08-06 07:24:00,994.364606,15642300000.0,2.242762e+23,19862320000000.0,153.108332,3790034000000.0,1.826607e+22,-7.339826e+24,5692.225659,...,,0.0,,0.0,,0.0,,2.1111e-07,12,True
5,2010-08-06 07:36:00,984.920685,15680600000.0,2.245457e+23,19921600000000.0,158.285208,2165144000000.0,1.839902e+22,-7.382202e+24,5630.365466,...,,0.0,,0.0,,0.0,,1.9386e-07,12,True
6,2010-08-06 07:48:00,993.204165,15712490000.0,2.25652e+23,20270700000000.0,164.61902,2393348000000.0,1.851214e+22,-7.393427e+24,5582.44922,...,,0.0,,0.0,,0.0,,1.8709e-07,12,True
7,2010-08-06 08:00:00,1016.056161,15768820000.0,2.276326e+23,20855140000000.0,146.881132,2601881000000.0,1.860364e+22,-7.358696e+24,5591.033267,...,,0.0,,0.0,,0.0,,1.7785e-07,12,True
8,2010-08-06 08:12:00,1011.574889,15864670000.0,2.285766e+23,20708260000000.0,147.253891,2682310000000.0,1.87787e+22,-7.434874e+24,5535.138515,...,,0.0,,0.0,,0.0,,1.8138e-07,12,True
9,2010-08-06 08:24:00,1038.668646,15902620000.0,2.297426e+23,21581560000000.0,137.473923,1900770000000.0,1.886895e+22,-7.42725e+24,5501.450333,...,,0.0,,0.0,,0.0,,1.9775e-07,12,True
10,2010-08-06 08:36:00,1049.760693,15971220000.0,2.30606e+23,22063590000000.0,130.681215,3178608000000.0,1.894578e+22,-7.457033e+24,5491.910988,...,,0.0,,0.0,,0.0,,1.8846e-07,12,True


In [2]:
import torch
import torch.nn

conv1 = torch.nn.Conv1d(in_channels=24,
                        out_channels=40,
                        kernel_size=7,
                        bias=True)
conv1.weight.shape
conv1.bias.shape

torch.Size([40])