In [1]:
# import packages for data manipulation
import pandas as pd
import numpy as np

# Plotting libraries
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import janitor

from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame, roll_time_series
from sklearn.ensemble import AdaBoostRegressor
from tsfresh.utilities.dataframe_functions import impute


In [2]:
df = pd.read_csv('../avocado.csv')

In [3]:
# Removing index column
df.drop('Unnamed: 0', axis=1, inplace=True)

# Removing records with TotalUS region, assuming it is nust the average of all other regions
df = df.loc[df.region!='TotalUS'].reset_index(drop=True)

# Making date to datetime and sorting chrinologically
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(['region','Date'])
df = df.clean_names()

In [4]:
df_conventional = df.loc[df.type=='conventional']

In [5]:
def tsfresh_feature_extraction(region,avocado_type):
    temp = df.loc[(df.region==region)&(df.type==avocado_type)].reset_index(drop=True)
    df_shift, y = make_forecasting_frame(temp.averageprice, kind="price", max_timeshift=52, rolling_direction=1)
    X = extract_features(df_shift, column_id="id", column_sort="time", column_value="value", impute_function=impute,
                     show_warnings=False)
    X = X.loc[:, X.apply(pd.Series.nunique) != 1] 
    print(temp.shape,X.shape)
    X["region"] = region
    X['date'] = temp.date
    X['type'] = avocado_type
    X = X.iloc[1:,]
    y = y.iloc[1: ]
    X = X.reset_index(drop=True)
    return X,y

In [None]:
X = []
y = []
for avocado_type in ['conventional','organic']:
    for region in list(set(df.region)):
        x_temp,y_temp = tsfresh_feature_extraction(region,avocado_type)
        X.append(x_temp)
        y.append(y_temp)

Feature Extraction: 100%|██████████| 19/19 [00:04<00:00,  4.31it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 431)


Feature Extraction: 100%|██████████| 19/19 [00:04<00:00,  4.35it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 431)


Feature Extraction: 100%|██████████| 19/19 [00:04<00:00,  3.86it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 430)


Feature Extraction: 100%|██████████| 19/19 [00:05<00:00,  3.76it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 432)


Feature Extraction: 100%|██████████| 19/19 [00:04<00:00,  4.05it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 431)


Feature Extraction: 100%|██████████| 19/19 [00:05<00:00,  3.23it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 431)


Feature Extraction: 100%|██████████| 19/19 [00:06<00:00,  2.85it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 431)


Feature Extraction: 100%|██████████| 19/19 [00:05<00:00,  3.34it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 432)


Feature Extraction: 100%|██████████| 19/19 [00:06<00:00,  2.80it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 432)


Feature Extraction: 100%|██████████| 19/19 [00:06<00:00,  2.91it/s]
 'value__fft_coefficient__coeff_27__attr_"angle"'
 'value__fft_coefficient__coeff_27__attr_"imag"'
 'value__fft_coefficient__coeff_27__attr_"real"'
 'value__fft_coefficient__coeff_28__attr_"abs"'
 'value__fft_coefficient__coeff_28__attr_"angle"'
 'value__fft_coefficient__coeff_28__attr_"imag"'
 'value__fft_coefficient__coeff_28__attr_"real"'
 'value__fft_coefficient__coeff_29__attr_"abs"'
 'value__fft_coefficient__coeff_29__attr_"angle"'
 'value__fft_coefficient__coeff_29__attr_"imag"'
 'value__fft_coefficient__coeff_29__attr_"real"'
 'value__fft_coefficient__coeff_30__attr_"abs"'
 'value__fft_coefficient__coeff_30__attr_"angle"'
 'value__fft_coefficient__coeff_30__attr_"imag"'
 'value__fft_coefficient__coeff_30__attr_"real"'
 'value__fft_coefficient__coeff_31__attr_"abs"'
 'value__fft_coefficient__coeff_31__attr_"angle"'
 'value__fft_coefficient__coeff_31__attr_"imag"'
 'value__fft_coefficient__coeff_31__attr_"real"'


(169, 13) (168, 432)


Feature Extraction:  16%|█▌        | 3/19 [00:01<00:11,  1.35it/s]

In [None]:
train = pd.concat(X)
train_y = pd.concat(y)

In [None]:
train.shape, train_y.shape, df.shape

In [None]:
train.head()

In [None]:
train.isnull().sum()

In [None]:
train = train.reset_index(drop=True)

In [None]:
import pickle
pickle.dump(train,open('tsfresh_features.p','wb'))

In [None]:
na_df = pd.DataFrame(train.isnull().sum().reset_index())
na_df.columns = ['feature','num_NA']

In [None]:
drop_features = na_df.loc[na_df.num_NA>0].feature.tolist()

In [None]:
train.drop(drop_features,axis=1,inplace=True)

In [None]:
import pickle
pickle.dump(train,open('tsfresh_features.p','wb'))