In [1]:
import gc
import math
import os
import pathlib
import time

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm_notebook

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedKFold, KFold, RepeatedKFold, train_test_split

import lightgbm as lgb
import xgboost as xgb

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import adam
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.


In [2]:
%matplotlib inline

In [76]:
#np.random.randint?

In [54]:
def features_maker(df, first_index=None, last_index=None, smootch_windows_size = (3, 5, 7)):
    if first_index == None or last_index == None:
        first_index = 0
        last_index = df.shape[0] - 1

    smooth_feature_names = ['smootch_feature_{}_ws_{}'.format(i, window_size) for i, window_size in enumerate(smootch_windows_size)]
    for feature_name in smooth_feature_names:
        df[feature_name] = 0
    for i in df.index:
        for smooth_feature_name, window_size in zip(smooth_feature_names, smootch_windows_size):
            half_window_size = window_size // 2
            data_series = df['acoustic_data']
            if i < first_index + half_window_size:
                smooth_feature_value = data_series.iloc[first_index:first_index + window_size].mean()
            elif i < last_index - half_window_size:
                smooth_feature_value = data_series.iloc[last_index - window_size:last_index].mean()
            else:
                smooth_feature_value = data_series.iloc[i - half_window_size:i + half_window_size].mean()
            df.iloc[i][feature_name] = data_series[i] - smooth_feature_value
    return df

In [161]:
def add_features(
        df,
        first_index=None,
        last_index=None,
        sample_size=150000,
        holdout_size=50000,
        smootch_windows_size = (3, 5, 7)
    ):
    
    if first_index == None or last_index == None:
        first_index = df.index.min()
        last_index = df.index.max()
    sample_indexes = np.random.randint(first_index, last_index, sample_size)
    sample_indexes.sort()
        
    smootch_feature_names = ['smootch_feature_{}_ws_{}'.format(i, window_size) for i, window_size in enumerate(smootch_windows_size)]
    half_windows_size = [ws // 2 for ws in smootch_windows_size]
    print("half_windows_size:\n", half_windows_size)
    
    print("sample_indexes.shape:", sample_indexes.shape)
    print("sample_indexes[:5]", sample_indexes[:5])
    print("sample_indexes[-5:]", sample_indexes[-5:])
    print("df.index.min()", df.index.min())
    print("df.index.max()", df.index.max())
    
    
    print("before data_series")
    sample_df = df.iloc[sample_indexes]
    data_series = sample_df['acoustic_data']
    print("sample_df.index.tolist()[:5]:", sample_df.index.tolist()[:5])
    print("sample_df.index.tolist()[-5:]:", sample_df.index.tolist()[-5:])
    
    #data_series = df[sample_indexes]['acoustic_data']
    print("data_series.head()\n", data_series.head())
    
    #sample_df = df[sample_indexes]
    for feature_name in smootch_feature_names:
        sample_df[feature_name] = 0
    begin_smootch_features_value = []
    end_smootch_features_value = []
    for h_w_size, feature_name in zip(half_windows_size, smootch_feature_names):
        print("\n\n")
        print(feature_name, h_w_size)
        print("before begin and end indexes operations")
        begin_indexes = sample_indexes[:h_w_size]
        sample_begin_indexes = sample_indexes[:h_w_size]
        print("sample_begin_indexes:\n", sample_begin_indexes)
        full_data_begin_indexes = df.index[:h_w_size].tolist()
        print("full_data_begin_indexes:\n", full_data_begin_indexes)
        in_window_begin_indexes = [idx for idx in sample_begin_indexes if idx in full_data_begin_indexes]
        print("in_window_begin_indexes:\n", in_window_begin_indexes)
        
        sample_end_indexes = sample_indexes[-h_w_size:]
        print("sample_end_indexes:\n", sample_end_indexes)
        full_data_end_indexes = df.index[-h_w_size:].tolist()
        print("full_data_end_indexes:\n", full_data_end_indexes)
        in_window_end_indexes = [idx for idx in sample_end_indexes if idx in full_data_end_indexes]
        print("in_window_end_indexes:\n", in_window_end_indexes)
        
        print("\n")
        print("begin_indexes <= h_w_size:\n", begin_indexes <= h_w_size)
        #begin_indexes = begin_indexes[begin_indexes <= h_w_size]
        begin_indexes = begin_indexes[begin_indexes <= h_w_size]
        
        end_indexes = sample_indexes[-h_w_size:]
        print("end_indexes >= end_indexes.max() - h_w_size:\n", end_indexes >= end_indexes.max() - h_w_size)
        print("end_indexes:\n", end_indexes)
        end_indexes = end_indexes[end_indexes >= end_indexes.max() - h_w_size]
        print("begin_idexes:\n", begin_indexes)
        print("end_idexes:\n", end_indexes)
        print("end_indexes.max():", end_indexes.max())
        print("h_w_size:", h_w_size)
        '''
        if begin_indexes:
            print("if begin_indexes")
            for i, b_idx in enumerate(begin_indexes):
                begin_smootch_features_value.append(df[b_idx]['acoustic_data'] - df.iloc[first_index:first_index + window_size]['acoustic_data'].mean())
                sample_df.iloc[i][feature_name] = df[b_idx]['acoustic_data'] - df.iloc[first_index:first_index + window_size]['acoustic_data'].mean()
        if end_indexes:
            print("if end_indexes")
            for i, e_idx in enumerate(end_indexes):
                end_smootch_features_value.append(df[e_idx]['acoustic_data'] - df.iloc[last_index - window_size:last_index]['acoustic_data'].mean())
                sample_df.iloc[sample_df.shape[0] - i][feature_name] = df[e_idx]['acoustic_data'] - df.iloc[first_index:first_index + window_size]['acoustic_data'].mean()
        '''
    return sample_df

In [240]:
def add_features(
        df,
        first_index=None,
        last_index=None,
        sample_size=150000,
        holdout_size=50000,
        smootch_windows_size = (3, 5, 7)
    ):
    
    if first_index == None or last_index == None:
        first_index = df.index.min()
        last_index = df.index.max()
    sample_indexes = np.random.randint(first_index, last_index, sample_size)
    sample_indexes.sort()
        
    #smootch_feature_names = ['smootch_feature_{}_ws_{}'.format(i, window_size) for i, window_size in enumerate(smootch_windows_size)]
    smootch_feature_names = ['smootch_mean_ws_{}'.format(window_size) for window_size in smootch_windows_size]
    half_windows_size = [ws // 2 for ws in smootch_windows_size]
    print("half_windows_size:\n", half_windows_size)
    
    print("sample_indexes.shape:", sample_indexes.shape)
    print("sample_indexes[:5]", sample_indexes[:5])
    print("sample_indexes[-5:]", sample_indexes[-5:])
    print("df.index.min()", df.index.min())
    print("df.index.max()", df.index.max())
    
    acoustic_data_series = df['acoustic_data']
    
    print("before data_series")
    sample_df = df.iloc[sample_indexes]
    #sample_df = sample_df.reset_index().drop('index')
    sample_df.reset_index(inplace=True)
    #sample_df = sample_df[sample_df.columns.drop(['index'])]
    sample_df.drop(columns=['index'], inplace=True)
    print("index in sample_df.columns:", 'index' in sample_df.columns)
    #sample_df.drop('index', inplace=True)
    print("sample_df.index after reset:\n", sample_df.index)
    
    print("sample_df.index[:5]:", sample_df.index[:5])
    print("sample_df.index[-5:]:", sample_df.index[-5:])
    
    #data_series = df[sample_indexes]['acoustic_data']
    print("acoustic_data_series.head()\n", acoustic_data_series.head())
    
    #sample_df = df[sample_indexes]
    for feature_name in smootch_feature_names:
        sample_df[feature_name] = 0
    begin_smootch_features_value = []
    end_smootch_features_value = []
    
    sample_indexes_set = set(sample_indexes)
    begin_indexes_set = set()
    end_indexes_set = set()
    
    for h_w_size, feature_name in zip(half_windows_size, smootch_feature_names):
        print("\n\n")
        print(feature_name, h_w_size)
        print("before begin and end indexes operations")
        begin_indexes = sample_indexes[:h_w_size]
        
        sample_begin_indexes = sample_indexes[:h_w_size]
        #sample_begin_indexes = [0, 1, 2]
        print("sample_begin_indexes:\n", sample_begin_indexes)
        full_data_begin_indexes = df.index[:h_w_size].tolist()
        #full_data_begin_indexes = [0, 1, 2]
        print("full_data_begin_indexes:\n", full_data_begin_indexes)
        in_window_begin_indexes = [idx for idx in sample_begin_indexes if idx in full_data_begin_indexes]
        print("in_window_begin_indexes:\n", in_window_begin_indexes)
        
        sample_end_indexes = sample_indexes[-h_w_size:]
        #sample_end_indexes = [44429301, 44429302, 44429303]
        print("sample_end_indexes:\n", sample_end_indexes)
        full_data_end_indexes = df.index[-h_w_size:].tolist()
        print("full_data_end_indexes:\n", full_data_end_indexes)
        in_window_end_indexes = [idx for idx in sample_end_indexes if idx in full_data_end_indexes]
        print("in_window_end_indexes:\n", in_window_end_indexes)
        
        print("\n")
        
        #begin_indexes = begin_indexes[begin_indexes <= h_w_size]
        begin_indexes = begin_indexes[begin_indexes <= h_w_size]
        
        end_indexes = sample_indexes[-h_w_size:]
        
        if in_window_begin_indexes:
            begin_indexes_set.union(set(in_window_begin_indexes))
            print("if begin_indexes")
            for b_idx in in_window_begin_indexes:
                sample_df[feature_name].iloc[b_idx] = df[b_idx]['acoustic_data'] - df.iloc[first_index:first_index + window_size]['acoustic_data'].mean()
        if in_window_end_indexes:
            end_indexes_set.union(set(in_window_end_indexes))
            print("if end_indexes")
            for e_idx in in_window_end_indexes:
                sample_df[feature_name].iloc[e_idx] = df[e_idx]['acoustic_data'] - df.iloc[last_index - window_size:]['acoustic_data'].mean()
    print("begin_indexes_set:", begin_indexes_set)
    print("end_indexes_set:", end_indexes_set)
    sample_indexes_set.difference(begin_indexes_set)
    sample_indexes_set.difference(end_indexes_set)
    sorted_sample_indexes = sorted(tuple(sample_indexes_set))
    #for i in sorted(tuple(sample_indexes_set)):
    for i in range(sample_df.shape[0]):
        print("i:", i)
        print("i in sample_df.index", i in sample_df.index)
        sample_idx = sorted_sample_indexes[i]
        temp_value = 0
        for feature_name, half_window_size in zip(smootch_feature_names, half_windows_size):
            #half_window_size = window_size // 2
            #data_series = df['acoustic_data']
            
            feature_value = acoustic_data_series.iloc[sample_idx - half_window_size:sample_idx + half_window_size].mean()
            #temp_value = acoustic_data_series.iloc[sample_idx] - feature_value
            temp_value = sample_df.iloc[i]['acoustic_data'] - feature_value
            print("temp_value:", temp_value)
            sample_df[feature_name].iloc[i] = temp_value
            #print("feature_value:", feature_value)
            #acoustic_data_series.iloc[i]
            '''
            try:
                temp_value = acoustic_data_series.iloc[i] - feature_value
            except IndexError as ire:
                print("i:", i)
                print("IndexError as ire:\n", ire)
            sample_df.iloc[i][feature_name] = temp_value
            '''
    return sample_df

In [253]:
def add_features(
        df,
        first_index=None,
        last_index=None,
        sample_size=150000,
        holdout_size=50000,
        smootch_windows_size = (3, 5, 7)
    ):
    
    if first_index == None or last_index == None:
        first_index = df.index.min()
        last_index = df.index.max()
    sample_indexes = np.random.randint(first_index, last_index, sample_size)
    sample_indexes.sort()
        
    smootch_feature_names = ['smootch_mean_ws_{}'.format(window_size) for window_size in smootch_windows_size]
    half_windows_size = [ws // 2 for ws in smootch_windows_size]

    acoustic_data_series = df['acoustic_data']
    
    sample_df = df.iloc[sample_indexes]
    sample_df.reset_index(inplace=True)
    sample_df.drop(columns=['index'], inplace=True)
    
    for feature_name in smootch_feature_names:
        sample_df[feature_name] = 0
    begin_smootch_features_value = []
    end_smootch_features_value = []
    
    sample_indexes_set = set(sample_indexes)
    begin_indexes_set = set()
    end_indexes_set = set()
    
    for h_w_size, feature_name in zip(half_windows_size, smootch_feature_names):
        begin_indexes = sample_indexes[:h_w_size]        
        sample_begin_indexes = sample_indexes[:h_w_size]
        full_data_begin_indexes = df.index[:h_w_size].tolist()

        in_window_begin_indexes = [idx for idx in sample_begin_indexes if idx in full_data_begin_indexes]
        
        sample_end_indexes = sample_indexes[-h_w_size:]

        full_data_end_indexes = df.index[-h_w_size:].tolist()
        
        in_window_end_indexes = [idx for idx in sample_end_indexes if idx in full_data_end_indexes]
        begin_indexes = begin_indexes[begin_indexes <= h_w_size]
        
        end_indexes = sample_indexes[-h_w_size:]
        
        if in_window_begin_indexes:
            begin_indexes_set.union(set(in_window_begin_indexes))
            for b_idx in in_window_begin_indexes:
                sample_df[feature_name].iloc[b_idx] = df[b_idx]['acoustic_data'] - df.iloc[first_index:first_index + window_size]['acoustic_data'].mean()
        if in_window_end_indexes:
            end_indexes_set.union(set(in_window_end_indexes))
            for e_idx in in_window_end_indexes:
                sample_df[feature_name].iloc[e_idx] = df[e_idx]['acoustic_data'] - df.iloc[last_index - window_size:]['acoustic_data'].mean()
    #sample_indexes_set.difference(begin_indexes_set)
    #sample_indexes_set.difference(end_indexes_set)
    sorted_sample_indexes = sorted(tuple(sample_indexes_set))
    start_time = time.time()
    
    for i in range(sample_df.shape[0])[len(begin_indexes_set):sample_df.shape[0] - len(end_indexes_set)]:
        #sample_idx = sorted_sample_indexes[i]
        sample_idx = sample_indexes[i]
        temp_value = 0
        for feature_name, half_window_size in zip(smootch_feature_names, half_windows_size):
            
            feature_value = acoustic_data_series.iloc[sample_idx - half_window_size:sample_idx + half_window_size].mean()
            #temp_value = sample_df.iloc[i]['acoustic_data'] - feature_value
            #sample_df[feature_name].iloc[i] = temp_value
    print("Full calculation feature value time (with slicing) {} min:".format((time.time() - start_time) / 60))
    return sample_df

In [None]:
def add_features(
        df,
        first_index=None,
        last_index=None,
        sample_size=150000,
        holdout_size=50000,
        smootch_windows_size = (3, 5, 7)
    ):
    
    if first_index == None or last_index == None:
        first_index = df.index.min()
        last_index = df.index.max()
    sample_indexes = np.random.randint(first_index, last_index, sample_size)
    sample_indexes.sort()
        
    smootch_feature_names = ['smootch_mean_ws_{}'.format(window_size) for window_size in smootch_windows_size]
    half_windows_size = [ws // 2 for ws in smootch_windows_size]

    acoustic_data_series = df['acoustic_data']
    
    sample_df = df.iloc[sample_indexes]
    sample_df.reset_index(inplace=True)
    sample_df.drop(columns=['index'], inplace=True)
    
    for feature_name in smootch_feature_names:
        sample_df[feature_name] = 0
    begin_smootch_features_value = []
    end_smootch_features_value = []
    
    #sample_indexes_set = set(sample_indexes)
    begin_indexes_set = set()
    end_indexes_set = set()
    
    start_time = time.time()
    
    for window_size, feature_name in zip(smootch_windows_size, smootch_feature_names):
        
        feature_values_list = list(range(sample_size))
        
        half_window_size = window_size // 2
        begin_indexes = sample_indexes[:half_window_size]        
        sample_begin_indexes = sample_indexes[:h_w_size]
        full_data_begin_indexes = df.index[:half_window_size].tolist()

        in_window_begin_indexes = [idx for idx in sample_begin_indexes if idx in full_data_begin_indexes]
        
        sample_end_indexes = sample_indexes[-half_window_size:]

        full_data_end_indexes = df.index[-half_window_size:].tolist()
        
        in_window_end_indexes = [idx for idx in sample_end_indexes if idx in full_data_end_indexes]
        begin_indexes = begin_indexes[begin_indexes <= half_window_size]
        
        end_indexes = sample_indexes[-half_window_size:]
        
        if in_window_begin_indexes:
            begin_indexes_set.union(set(in_window_begin_indexes))
            for i, b_idx in enumerate(in_window_begin_indexes):
                #sample_df[feature_name].iloc[b_idx] = df[b_idx]['acoustic_data'] - df.iloc[first_index:first_index + window_size]['acoustic_data'].mean()
                feature_values_list[i] = df[b_idx]['acoustic_data'] - df.iloc[first_index:first_index + window_size]['acoustic_data'].mean()
        if in_window_end_indexes:
            end_indexes_set.union(set(in_window_end_indexes))
            for i, e_idx in enumerat(in_window_end_indexes):
                #sample_df[feature_name].iloc[e_idx] = df[e_idx]['acoustic_data'] - df.iloc[last_index - window_size:]['acoustic_data'].mean()
                feature_values_list[-i] = df[e_idx]['acoustic_data'] - df.iloc[last_index - window_size:]['acoustic_data'].mean()
        for i in range(sample_df.shape[0])[len(begin_indexes_set):sample_df.shape[0] - len(end_indexes_set)]:
            sample_idx = sample_indexes[i]
            feature_value = acoustic_data_series.iloc[sample_idx - half_window_size:sample_idx + half_window_size].mean()
            feature_values_list[i] = feature_value
            #temp_value = sample_df.iloc[i]['acoustic_data'] - feature_value
            #sample_df[feature_name].iloc[i] = temp_value
    print("Full calculation feature value time (with slicing) {} min:".format((time.time() - start_time) / 60))
    return sample_df

In [212]:
#earthquake_1_df.reset_index?

In [184]:
earthquake_margin_indexes =[
    5656573,
    50085877,
    104677355,
    138772452,
    187641819,
    218652629,
    245829584,
    307838916,
    338276286,
    375377847,
    419368879,
    461811622,
    495800224,
    528777114,
    585568143,
    621985672
]

In [8]:
earthquakes_length = [earthquake_margin_indexes[i + 1] - earthquake_margin_indexes[i] for i in range(len(earthquake_margin_indexes) - 1)]

In [9]:
earthquakes_length

[44429304,
 54591478,
 34095097,
 48869367,
 31010810,
 27176955,
 62009332,
 30437370,
 37101561,
 43991032,
 42442743,
 33988602,
 32976890,
 56791029,
 36417529]

In [10]:
621985672 - 585568143

36417529

In [11]:
50085877 - 5656573

44429304

In [32]:
complete_earthquakes_length = earthquakes_length[:-1]

In [15]:
#complete_earthquaces_length = complete_earthquaces_length[:-1]

In [33]:
complete_earthquakes_length

[44429304,
 54591478,
 34095097,
 48869367,
 31010810,
 27176955,
 62009332,
 30437370,
 37101561,
 43991032,
 42442743,
 33988602,
 32976890,
 56791029]

In [34]:
len(complete_earthquakes_length)

14

In [36]:
earthquakes_length_more_2_less_3 = [length for length in complete_earthquaces_length if length > 20000000 and length < 30000000]

In [37]:
earthquakes_length_more_3_less_4 = [length for length in complete_earthquaces_length if length > 30000000 and length < 40000000]

In [38]:
earthquakes_length_more_4_less_5 = [length for length in complete_earthquaces_length if length > 40000000 and length < 50000000]

In [39]:
earthquakes_length_more_5_less_6 = [length for length in complete_earthquaces_length if length > 50000000 and length < 60000000]

In [40]:
earthquakes_length_more_6 = [length for length in complete_earthquaces_length if length > 60000000]

In [41]:
earthquakes_length_more_3_less_4

[34095097, 31010810, 30437370, 37101561, 33988602, 32976890]

In [42]:
earthquakes_length_more_2_less_3

[27176955]

In [43]:
earthquakes_length_more_3_less_4

[34095097, 31010810, 30437370, 37101561, 33988602, 32976890]

In [44]:
earthquakes_length_more_4_less_5

[44429304, 48869367, 43991032, 42442743]

In [45]:
earthquakes_length_more_5_less_6

[54591478, 56791029]

In [46]:
earthquakes_length_more_6

[62009332]

In [92]:
%time
earthquake_1_df = pd.read_csv(
    '../input/train/train.csv',
    #nrows=100000000,
    names=['acoustic_data', 'time_to_failure'],
    dtype={'acoustic_data': np.float32, 'time_to_failure': np.float32},
    skiprows=earthquake_margin_indexes[0],
    nrows=complete_earthquakes_length[0]
)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 6.68 µs


In [49]:
earthquake_1_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44429304 entries, 0 to 44429303
Data columns (total 2 columns):
acoustic_data      float32
time_to_failure    float32
dtypes: float32(2)
memory usage: 339.0 MB


%%time
earthquake_1_with_additional_features_df = features_maker(earthquake_1_df)

In [56]:
#np.random.randint?

In [255]:
%%time
earthquake_1_with_additional_features_df = add_features(earthquake_1_df, sample_size=150000)

Full calculation feature value time (with slicing) 1.7701443672180175 min:
CPU times: user 1min 46s, sys: 184 ms, total: 1min 46s
Wall time: 1min 46s


In [244]:
print(earthquake_1_with_additional_features_df[:10])
print()
print(earthquake_1_with_additional_features_df[-10:])

   acoustic_data  time_to_failure  smootch_mean_ws_3  smootch_mean_ws_5  \
0           -1.0        11.340696                3.5               2.75   
1            6.0        11.207697                1.0               2.00   
2            4.0        10.928999                1.0               1.00   
3            6.0        10.895998                0.5               0.75   
4            2.0        10.537398               -1.0              -1.00   
5            1.0        10.463996               -1.0              -1.25   
6            4.0        10.274598                1.0               0.00   
7           -1.0        10.246897               -2.0              -4.25   
8           16.0        10.245899                3.0               5.25   
9            7.0        10.135198                0.0               1.50   

   smootch_mean_ws_7  
0           1.000000  
1           2.166667  
2           0.333333  
3           1.166667  
4          -0.333333  
5          -1.000000  
6          -0

In [69]:
earthquake_1_df.index.min()

0

In [70]:
earthquake_1_df.index.max()

44429303

In [83]:
earthquake_1_df.iloc[522]

acoustic_data             2.000000
time_to_failure          11.540799
smooth_feature_0_ws_3     0.000000
smooth_feature_1_ws_5     0.000000
smooth_feature_2_ws_7     0.000000
Name: 522, dtype: float64

In [129]:
earthquake_1_df.head()

Unnamed: 0,acoustic_data,time_to_failure
0,5.0,0.000795
1,4.0,0.000795
2,4.0,11.5408
3,5.0,11.5408
4,6.0,11.5408


In [201]:
earthquake_1_with_additional_features_df.head()

Unnamed: 0,index,acoustic_data,time_to_failure,smootch_mean_ws_3,smootch_mean_ws_5,smootch_mean_ws_7
0,17415,6.0,11.536499,0,0,0
1,1314745,7.0,11.200295,0,0,0
2,1456795,-2.0,11.162997,0,0,0
3,2023502,4.0,11.0152,0,0,0
4,2381435,6.0,10.922598,0,0,0


In [144]:
e_1_idx = earthquake_1_df.index

In [145]:
150590 in e_1_idx

True

In [146]:
e_1_acoustic_series = earthquake_1_df['acoustic_data']

In [147]:
e_1_acoustic_series.shape

(44429304,)

In [148]:
e_1_as_idx = e_1_acoustic_series.index

In [149]:
e_1_as_idx.shape

(44429304,)

In [150]:
150590 in e_1_as_idx

True

In [151]:
e_1_acoustic_series[150590]

7.0

In [169]:
earthquake_1_df.reset_index?

In [209]:
earthquake_1_with_additional_features_copy_df = earthquake_1_with_additional_features_df.copy()

In [210]:
earthquake_1_with_additional_features_copy_df.head()

Unnamed: 0,index,acoustic_data,time_to_failure,smootch_mean_ws_3,smootch_mean_ws_5,smootch_mean_ws_7
0,100820,2.0,11.515197,0,0,0
1,1557602,3.0,11.136398,0,0,0
2,1966300,5.0,11.03,0,0,0
3,2055354,4.0,11.007696,0,0,0
4,2310165,1.0,10.9407,0,0,0


In [206]:
earthquake_1_with_additional_features_copy_df.columns.drop?

In [219]:
df = pd.DataFrame({'a': [0, 0, 0], 'b': [1, 1, 1]})

In [220]:
df

Unnamed: 0,a,b
0,0,1
1,0,1
2,0,1


In [221]:
df.iloc[1]['a'] = 2

In [222]:
df

Unnamed: 0,a,b
0,0,1
1,2,1
2,0,1


In [223]:
df['c'] = 3

In [224]:
df

Unnamed: 0,a,b,c
0,0,1,3
1,2,1,3
2,0,1,3


In [225]:
df.iloc[2]['c'] = 0

In [226]:
df

Unnamed: 0,a,b,c
0,0,1,3
1,2,1,3
2,0,1,3


In [228]:
df['c'].iloc[2] = 0

In [229]:
df

Unnamed: 0,a,b,c
0,0,1,3
1,2,1,3
2,0,1,0


In [230]:
df.iloc[0]['c'] = 0

In [231]:
df

Unnamed: 0,a,b,c
0,0,1,0
1,2,1,3
2,0,1,0


In [232]:
df1 = pd.DataFrame({'a': [1, 1, 1], 'b': [2, 2, 2]})

In [233]:
df1

Unnamed: 0,a,b
0,1,2
1,1,2
2,1,2


In [236]:
df1['c'] = 0

In [237]:
df1

Unnamed: 0,a,b,c
0,1,2,0
1,1,2,0
2,1,2,0


In [238]:
df1['c'].iloc[0] = 4

In [239]:
df1

Unnamed: 0,a,b,c
0,1,2,4
1,1,2,0
2,1,2,0
