In [1]:
from feature import *
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import torch.nn.functional as F


In [9]:
def extract_features(row):
    flux_density = row.iloc[4:-1]  # 假设磁通密度列从第5列开始
    # print(type(flux_density.values))
    # print(flux_density.values)
    # print(len(flux_density.values))
    fs = row.iloc[1]
    skewness, symmetric_messure = calculate_symmetry(flux_density.values)
    max_freq, mean_mag, std_mag, energy = extract_frequency_features(flux_density.values, fs)
    slope = calculate_slope(flux_density.values)
    return pd.Series({
        'mean_flux': np.mean(flux_density),
        'std_flux': np.std(flux_density),
        'max_flux': np.max(flux_density),
        'min_flux': np.min(flux_density),
        'range_flux': np.max(flux_density) - np.min(flux_density),
        'skewness': skewness,
        'symmetric_messure': symmetric_messure,
        'max_freq': max_freq,
        'mean_mag': mean_mag,
        'std_mag': std_mag,
        'energy': energy,
        'slope_mean': np.mean(slope),
        'slope_std': np.std(slope),
        'slope_max': np.max(slope),
        'slope_min': np.min(slope),
        'slope_abs_mean': np.mean(np.abs(slope)),
        'duty_cicle': calculate_duty_cycle(flux_density.values),
    })

# features = merged_data.apply(extract_features, axis=1)

In [5]:
df = pd.read_csv('all_train.csv')
df[:2]

Unnamed: 0,temperature,frequency,core_loss,waveform,0,1,2,3,4,5,...,1015,1016,1017,1018,1019,1020,1021,1022,1023,material
0,25,50030,1998.0,正弦波,0.000213,0.000389,0.000566,0.000743,0.000919,0.001096,...,-0.001374,-0.001198,-0.001022,-0.000846,-0.000669,-0.000492,-0.000316,-0.00014,3.6e-05,材料1
1,25,50020,2428.0,正弦波,-0.000551,-0.000358,-0.000165,2.8e-05,0.000221,0.000413,...,-0.002284,-0.002091,-0.001899,-0.001707,-0.001515,-0.001322,-0.001129,-0.000937,-0.000744,材料1


In [8]:
encoder = LabelEncoder()
waveform = df['material'].values
label = encoder.fit_transform(waveform)
label
len(label)

12400

In [10]:
df_features = df.apply(extract_features, axis=1)

In [16]:
df_features[5333:5335]

Unnamed: 0,mean_flux,std_flux,max_flux,min_flux,range_flux,skewness,symmetric_messure,max_freq,mean_mag,std_mag,energy,slope_mean,slope_std,slope_max,slope_min,slope_abs_mean,duty_cicle,material
5333,9.765865e-13,0.089133,0.154164,-0.152546,0.306711,0.008384,0.093111,122.939453,0.18767,2.846081,4165.322344,-1e-06,0.000654,0.001045,-0.000473,0.000598,49.902344,1
5334,5.859312e-12,0.142248,0.245781,-0.243423,0.489204,0.007556,0.148513,122.939453,0.299336,4.542082,10608.697901,-2e-06,0.001044,0.001668,-0.000748,0.000954,49.902344,1


In [12]:

df_features['material'] = label


In [17]:
wave_label = encoder.fit_transform(df['waveform'])
wave_label

array([2, 2, 2, ..., 1, 1, 1])

In [18]:
df_features['temperature'] = df['temperature']
df_features['frequency'] = df['frequency']
df_features['wave_lable'] = wave_label
df_features['core_loss'] = df['core_loss']
df_features[:2]

Unnamed: 0,mean_flux,std_flux,max_flux,min_flux,range_flux,skewness,symmetric_messure,max_freq,mean_mag,std_mag,...,slope_std,slope_max,slope_min,slope_abs_mean,duty_cicle,material,temperature,frequency,wave_lable,core_loss
0,-4.003908e-11,0.0204,0.028849,-0.02884,0.057689,-0.000627,0.036719,48.857422,0.028984,0.652162,...,0.000125,0.000177,-0.000177,0.000113,50.0,0,25,50030,2,1998.0
1,-2.539062e-11,0.022223,0.031419,-0.031427,0.062846,-0.001222,0.039992,48.847656,0.031583,0.710445,...,0.000136,0.000193,-0.000193,0.000123,50.097656,0,25,50020,2,2428.0


In [20]:
df_features = df_features.drop(labels=['core_loss'], axis=1)


In [21]:
df_features_normalized = (df_features - df_features.min()) / (df_features.max() - df_features.min()) * 2 -1
df_features_normalized

Unnamed: 0,mean_flux,std_flux,max_flux,min_flux,range_flux,skewness,symmetric_messure,max_freq,mean_mag,std_mag,...,slope_mean,slope_std,slope_max,slope_min,slope_abs_mean,duty_cicle,material,temperature,frequency,wave_lable
0,-0.000124,-0.857452,-0.873468,0.871463,-0.872738,-0.249633,0.041179,-0.999823,-0.929511,-0.857257,...,0.565443,-0.910750,-0.956024,0.837115,-0.872635,0.142857,-1.0,-1.0,-0.999823,1.0
1,-0.000079,-0.839898,-0.856537,0.854389,-0.855733,-0.260879,0.051104,-0.999867,-0.919334,-0.839699,...,0.561084,-0.899198,-0.950689,0.820080,-0.855639,0.285714,-1.0,-1.0,-0.999867,1.0
2,-0.000003,-0.812135,-0.829429,0.827429,-0.828695,-0.247130,0.065986,-0.999867,-0.903269,-0.811932,...,0.555112,-0.880923,-0.943101,0.794005,-0.828609,0.142857,-1.0,-1.0,-0.999867,1.0
3,-0.000182,-0.781746,-0.799917,0.797648,-0.799044,-0.266703,0.083994,-0.999867,-0.885459,-0.781538,...,0.547577,-0.860923,-0.933858,0.764327,-0.798974,0.142857,-1.0,-1.0,-0.999867,1.0
4,0.000127,-0.747568,-0.766903,0.764255,-0.765836,-0.285153,0.103393,-0.999823,-0.865711,-0.747354,...,0.539587,-0.838431,-0.923896,0.730216,-0.765780,0.142857,-1.0,-1.0,-0.999823,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12395,-0.000155,-0.850094,-0.833827,0.836221,-0.835289,0.067505,0.036842,-0.338638,-0.865881,-0.850151,...,0.501244,-0.874864,-0.876665,0.888197,-0.835549,-0.285714,1.0,1.0,-0.338638,0.0
12396,-0.000021,-0.829376,-0.810944,0.813774,-0.812620,0.062649,0.047719,-0.338638,-0.847018,-0.829459,...,0.488939,-0.857613,-0.862100,0.874621,-0.812934,-0.285714,1.0,1.0,-0.338638,0.0
12397,-0.000242,-0.767636,-0.741549,0.745455,-0.743753,0.042720,0.080164,-0.338638,-0.794049,-0.767783,...,0.457284,-0.808052,-0.821520,0.836843,-0.744191,-0.285714,1.0,1.0,-0.338638,0.0
12398,0.000197,-0.732508,-0.701950,0.706212,-0.704326,0.007723,0.098576,-0.338638,-0.763886,-0.732690,...,0.438971,-0.779778,-0.798085,0.813014,-0.704837,-0.142857,1.0,1.0,-0.338638,0.0


In [22]:
y = df['core_loss']
y

0          1998.0
1          2428.0
2          3333.0
3          4503.0
4          6063.0
           ...   
12395     20605.0
12396     26504.0
12397     46232.0
12398     61458.0
12399    107581.0
Name: core_loss, Length: 12400, dtype: float64

In [24]:
df_features_normalized['core_loss'] = df['core_loss']
df_features_normalized.to_csv('feature_and_loss.csv', index=False, encoding='utf-8-sig')

In [None]:
df_features_normalized=df_features_normalized.drop(labels='core_loss', axis=1)

In [28]:
df_features_normalized

Unnamed: 0,mean_flux,std_flux,max_flux,min_flux,range_flux,skewness,symmetric_messure,max_freq,mean_mag,std_mag,...,slope_mean,slope_std,slope_max,slope_min,slope_abs_mean,duty_cicle,material,temperature,frequency,wave_lable
0,-0.000124,-0.857452,-0.873468,0.871463,-0.872738,-0.249633,0.041179,-0.999823,-0.929511,-0.857257,...,0.565443,-0.910750,-0.956024,0.837115,-0.872635,0.142857,-1.0,-1.0,-0.999823,1.0
1,-0.000079,-0.839898,-0.856537,0.854389,-0.855733,-0.260879,0.051104,-0.999867,-0.919334,-0.839699,...,0.561084,-0.899198,-0.950689,0.820080,-0.855639,0.285714,-1.0,-1.0,-0.999867,1.0
2,-0.000003,-0.812135,-0.829429,0.827429,-0.828695,-0.247130,0.065986,-0.999867,-0.903269,-0.811932,...,0.555112,-0.880923,-0.943101,0.794005,-0.828609,0.142857,-1.0,-1.0,-0.999867,1.0
3,-0.000182,-0.781746,-0.799917,0.797648,-0.799044,-0.266703,0.083994,-0.999867,-0.885459,-0.781538,...,0.547577,-0.860923,-0.933858,0.764327,-0.798974,0.142857,-1.0,-1.0,-0.999867,1.0
4,0.000127,-0.747568,-0.766903,0.764255,-0.765836,-0.285153,0.103393,-0.999823,-0.865711,-0.747354,...,0.539587,-0.838431,-0.923896,0.730216,-0.765780,0.142857,-1.0,-1.0,-0.999823,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12395,-0.000155,-0.850094,-0.833827,0.836221,-0.835289,0.067505,0.036842,-0.338638,-0.865881,-0.850151,...,0.501244,-0.874864,-0.876665,0.888197,-0.835549,-0.285714,1.0,1.0,-0.338638,0.0
12396,-0.000021,-0.829376,-0.810944,0.813774,-0.812620,0.062649,0.047719,-0.338638,-0.847018,-0.829459,...,0.488939,-0.857613,-0.862100,0.874621,-0.812934,-0.285714,1.0,1.0,-0.338638,0.0
12397,-0.000242,-0.767636,-0.741549,0.745455,-0.743753,0.042720,0.080164,-0.338638,-0.794049,-0.767783,...,0.457284,-0.808052,-0.821520,0.836843,-0.744191,-0.285714,1.0,1.0,-0.338638,0.0
12398,0.000197,-0.732508,-0.701950,0.706212,-0.704326,0.007723,0.098576,-0.338638,-0.763886,-0.732690,...,0.438971,-0.779778,-0.798085,0.813014,-0.704837,-0.142857,1.0,1.0,-0.338638,0.0


In [29]:
X = torch.tensor(df_features_normalized.values, dtype=torch.float32)
X

tensor([[-1.2424e-04, -8.5745e-01, -8.7347e-01,  ..., -1.0000e+00,
         -9.9982e-01,  1.0000e+00],
        [-7.8788e-05, -8.3990e-01, -8.5654e-01,  ..., -1.0000e+00,
         -9.9987e-01,  1.0000e+00],
        [-3.0303e-06, -8.1213e-01, -8.2943e-01,  ..., -1.0000e+00,
         -9.9987e-01,  1.0000e+00],
        ...,
        [-2.4242e-04, -7.6764e-01, -7.4155e-01,  ...,  1.0000e+00,
         -3.3864e-01,  0.0000e+00],
        [ 1.9697e-04, -7.3251e-01, -7.0195e-01,  ...,  1.0000e+00,
         -3.3864e-01,  0.0000e+00],
        [-1.4545e-04, -6.4915e-01, -6.0811e-01,  ...,  1.0000e+00,
         -3.3864e-01,  0.0000e+00]])