# Lab4 
## Структури для роботи з великими обсягами даних в Python
 **Мета**: отримати навички роботи із структурами для зберігання в Python (python, numpy, pandas, numpy array, dataframe, timeit) <br>
**Основні поняття**: numpy масиви, кортежі, списки, фрейми,профілювання.

In [1]:
import pandas as pd
import numpy as np
import timeit

### Таймер для підрахунку часу роботи функції

In [2]:
def timer(function, number=1, repeat=1):
    execution_times = timeit.repeat(function, number=number, repeat=repeat)
    average_execution_time = sum(execution_times) / len(execution_times)
    print(f"Average execution time: {average_execution_time:.6f} seconds")


## Reading and cleaning

### Reading with pandas

In [3]:
def pandas_read_file():
    df = pd.read_csv('household_power_consumption.txt', index_col=None, header=0, sep=";", low_memory=False, na_values = '?')
    return df
pandas_read_file()

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,16/12/2006,17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
1,16/12/2006,17:25:00,5.360,0.436,233.63,23.0,0.0,1.0,16.0
2,16/12/2006,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
3,16/12/2006,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
4,16/12/2006,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0
...,...,...,...,...,...,...,...,...,...
2075254,26/11/2010,20:58:00,0.946,0.000,240.43,4.0,0.0,0.0,0.0
2075255,26/11/2010,20:59:00,0.944,0.000,240.00,4.0,0.0,0.0,0.0
2075256,26/11/2010,21:00:00,0.938,0.000,239.82,3.8,0.0,0.0,0.0
2075257,26/11/2010,21:01:00,0.934,0.000,239.70,3.8,0.0,0.0,0.0


In [4]:
timer(pandas_read_file)

Average execution time: 1.914097 seconds


In [5]:
df = pandas_read_file()
df

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,16/12/2006,17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
1,16/12/2006,17:25:00,5.360,0.436,233.63,23.0,0.0,1.0,16.0
2,16/12/2006,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
3,16/12/2006,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
4,16/12/2006,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0
...,...,...,...,...,...,...,...,...,...
2075254,26/11/2010,20:58:00,0.946,0.000,240.43,4.0,0.0,0.0,0.0
2075255,26/11/2010,20:59:00,0.944,0.000,240.00,4.0,0.0,0.0,0.0
2075256,26/11/2010,21:00:00,0.938,0.000,239.82,3.8,0.0,0.0,0.0
2075257,26/11/2010,21:01:00,0.934,0.000,239.70,3.8,0.0,0.0,0.0


### Cleaning pandas

In [6]:
df.isnull().values.any()

True

In [7]:
df.isnull().sum()

Date                         0
Time                         0
Global_active_power      25979
Global_reactive_power    25979
Voltage                  25979
Global_intensity         25979
Sub_metering_1           25979
Sub_metering_2           25979
Sub_metering_3           25979
dtype: int64

In [8]:
df = df.fillna(method='bfill')

In [9]:
df.isnull().values.any()

False

In [10]:
df['Date'] = pd.to_datetime(df['Date'], format = "%d/%m/%Y")

df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S').dt.time

columns_to_convert = df.columns[2:9]
columns_to_convert_dict = {i: float for i in columns_to_convert} 
columns_to_convert_dict
df = df.astype(columns_to_convert_dict)

### Reading and cleaning with numpy

From dataframe (already cleaned)

In [11]:
def numpy_read_file_1():
    arr = df.values
    return arr
arr = numpy_read_file_1()
arr

array([[Timestamp('2006-12-16 00:00:00'), datetime.time(17, 24), 4.216,
        ..., 0.0, 1.0, 17.0],
       [Timestamp('2006-12-16 00:00:00'), datetime.time(17, 25), 5.36,
        ..., 0.0, 1.0, 16.0],
       [Timestamp('2006-12-16 00:00:00'), datetime.time(17, 26), 5.374,
        ..., 0.0, 2.0, 17.0],
       ...,
       [Timestamp('2010-11-26 00:00:00'), datetime.time(21, 0), 0.938,
        ..., 0.0, 0.0, 0.0],
       [Timestamp('2010-11-26 00:00:00'), datetime.time(21, 1), 0.934,
        ..., 0.0, 0.0, 0.0],
       [Timestamp('2010-11-26 00:00:00'), datetime.time(21, 2), 0.932,
        ..., 0.0, 0.0, 0.0]], dtype=object)

Directly with numpy

In [12]:
def numpy_read_file_2():
    dtype = [('Date', 'U10'), ('Time', 'U8'), ('Global_active_power', 'f8'), ('Global_reactive_power', 'f8'),
             ('Voltage', 'f8'), ('Global_intensity', 'f8'), ('Sub_metering_1', 'f8'), ('Sub_metering_2', 'f8'),
             ('Sub_metering_3', 'f8')]
    arr2 = np.genfromtxt('household_power_consumption.txt', delimiter=';', skip_header=1, dtype=dtype, missing_values='?', filling_values=0)
    return arr2
arr2 = numpy_read_file_2()
numpy_read_file_2()

array([('16/12/2006', '17:24:00', 4.216, 0.418, 234.84, 18.4, 0., 1., 17.),
       ('16/12/2006', '17:25:00', 5.36 , 0.436, 233.63, 23. , 0., 1., 16.),
       ('16/12/2006', '17:26:00', 5.374, 0.498, 233.29, 23. , 0., 2., 17.),
       ...,
       ('26/11/2010', '21:00:00', 0.938, 0.   , 239.82,  3.8, 0., 0.,  0.),
       ('26/11/2010', '21:01:00', 0.934, 0.   , 239.7 ,  3.8, 0., 0.,  0.),
       ('26/11/2010', '21:02:00', 0.932, 0.   , 239.55,  3.8, 0., 0.,  0.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<f8'), ('Global_reactive_power', '<f8'), ('Voltage', '<f8'), ('Global_intensity', '<f8'), ('Sub_metering_1', '<f8'), ('Sub_metering_2', '<f8'), ('Sub_metering_3', '<f8')])

In [13]:
t1 = timer(pandas_read_file)
t2 = timer(numpy_read_file_1)
t3 = timer(numpy_read_file_2)

Average execution time: 1.532044 seconds
Average execution time: 1.808997 seconds
Average execution time: 11.065847 seconds


## Перший рівень

## 1. Обрати всі домогосподарства, у яких загальна активна споживана потужність перевищує 5 кВт.

In [14]:
def funct_1():
    df_1 = df[df['Global_active_power'] > 5]
    df_1
    return df_1
funct_1()

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
1,2006-12-16,17:25:00,5.360,0.436,233.63,23.0,0.0,1.0,16.0
2,2006-12-16,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
3,2006-12-16,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
11,2006-12-16,17:35:00,5.412,0.470,232.78,23.2,0.0,1.0,17.0
12,2006-12-16,17:36:00,5.224,0.478,232.99,22.4,0.0,1.0,16.0
...,...,...,...,...,...,...,...,...,...
2069356,2010-11-22,18:40:00,5.408,0.150,231.50,23.6,48.0,0.0,0.0
2069357,2010-11-22,18:41:00,5.528,0.144,232.48,24.6,53.0,0.0,0.0
2071586,2010-11-24,07:50:00,5.172,0.050,235.18,22.0,0.0,38.0,17.0
2071587,2010-11-24,07:51:00,5.750,0.000,234.40,24.6,0.0,39.0,17.0


In [15]:
def funct_1_np_tuples():    
    arr2_1 = arr2[arr2['Global_active_power'] > 5.0]
    return arr2_1
funct_1_np_tuples()

array([('16/12/2006', '17:25:00', 5.36 , 0.436, 233.63, 23. , 0.,  1., 16.),
       ('16/12/2006', '17:26:00', 5.374, 0.498, 233.29, 23. , 0.,  2., 17.),
       ('16/12/2006', '17:27:00', 5.388, 0.502, 233.74, 23. , 0.,  1., 17.),
       ...,
       ('24/11/2010', '07:50:00', 5.172, 0.05 , 235.18, 22. , 0., 38., 17.),
       ('24/11/2010', '07:51:00', 5.75 , 0.   , 234.4 , 24.6, 0., 39., 17.),
       ('25/11/2010', '07:21:00', 5.074, 0.24 , 238.55, 21.4, 1.,  2., 18.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<f8'), ('Global_reactive_power', '<f8'), ('Voltage', '<f8'), ('Global_intensity', '<f8'), ('Sub_metering_1', '<f8'), ('Sub_metering_2', '<f8'), ('Sub_metering_3', '<f8')])

In [16]:
def funct_1_np():
    arr_1 = arr[arr[:,2]>5.0]
    return arr_1
show = funct_1_np()
show[:,2]

array([5.36, 5.374, 5.388, ..., 5.172, 5.75, 5.074], dtype=object)

In [17]:
t1_1 = timer(funct_1, repeat = 5)
t2_1 = timer(funct_1_np_tuples, repeat = 5)
t3_1= timer(funct_1_np, repeat = 5)

Average execution time: 0.005362 seconds
Average execution time: 0.017410 seconds
Average execution time: 0.033057 seconds


## 2. Обрати всі домогосподарства, у яких вольтаж перевищую 235 В.

In [18]:
def funct_2():
    f_2 = df[df['Voltage'] > 235]
    return f_2
funct_2()

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
4,2006-12-16,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0
5,2006-12-16,17:29:00,3.520,0.522,235.02,15.0,0.0,2.0,17.0
6,2006-12-16,17:30:00,3.702,0.520,235.09,15.8,0.0,1.0,17.0
7,2006-12-16,17:31:00,3.700,0.520,235.22,15.8,0.0,1.0,17.0
14,2006-12-16,17:38:00,4.054,0.422,235.24,17.6,0.0,1.0,17.0
...,...,...,...,...,...,...,...,...,...
2075254,2010-11-26,20:58:00,0.946,0.000,240.43,4.0,0.0,0.0,0.0
2075255,2010-11-26,20:59:00,0.944,0.000,240.00,4.0,0.0,0.0,0.0
2075256,2010-11-26,21:00:00,0.938,0.000,239.82,3.8,0.0,0.0,0.0
2075257,2010-11-26,21:01:00,0.934,0.000,239.70,3.8,0.0,0.0,0.0


In [19]:
def funct_2_np_tuples():    
    arr2_2 = arr2[arr2['Voltage'] > 235]
    return arr2_2
funct_2_np_tuples()

array([('16/12/2006', '17:28:00', 3.666, 0.528, 235.68, 15.8, 0., 1., 17.),
       ('16/12/2006', '17:29:00', 3.52 , 0.522, 235.02, 15. , 0., 2., 17.),
       ('16/12/2006', '17:30:00', 3.702, 0.52 , 235.09, 15.8, 0., 1., 17.),
       ...,
       ('26/11/2010', '21:00:00', 0.938, 0.   , 239.82,  3.8, 0., 0.,  0.),
       ('26/11/2010', '21:01:00', 0.934, 0.   , 239.7 ,  3.8, 0., 0.,  0.),
       ('26/11/2010', '21:02:00', 0.932, 0.   , 239.55,  3.8, 0., 0.,  0.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<f8'), ('Global_reactive_power', '<f8'), ('Voltage', '<f8'), ('Global_intensity', '<f8'), ('Sub_metering_1', '<f8'), ('Sub_metering_2', '<f8'), ('Sub_metering_3', '<f8')])

In [20]:
def funct_2_np():
    arr_2 = arr[arr[:,4]>235]
    return arr_2
show = funct_2_np()
show[:,4]

array([235.68, 235.02, 235.09, ..., 239.82, 239.7, 239.55], dtype=object)

In [21]:
t1_2 = timer(funct_2, repeat=5)
t2_2 = timer(funct_2_np_tuples, repeat = 5)
t3_2 = timer(funct_2_np, repeat = 5)

Average execution time: 0.111973 seconds
Average execution time: 0.178980 seconds
Average execution time: 0.342743 seconds


## 3. Обрати всі домогосподарства, у яких сила струму лежить в межах 19-20 А, для них виявити ті, у яких пральна машина та холодильних споживають більше, ніж бойлер та кондиціонер.

In [22]:
def funct_3():
    df_3 = df[(df['Global_intensity'] >= 19) & (df['Global_intensity'] <= 20)]
    df_31 = df_3[df_3['Sub_metering_2'] > df_3['Sub_metering_3']]
    return df_31
funct_3()

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
45,2006-12-16,18:09:00,4.464,0.136,234.66,19.0,0.0,37.0,16.0
460,2006-12-17,01:04:00,4.582,0.258,238.08,19.6,0.0,13.0,0.0
464,2006-12-17,01:08:00,4.618,0.104,239.61,19.6,0.0,27.0,0.0
475,2006-12-17,01:19:00,4.636,0.140,237.37,19.4,0.0,36.0,0.0
476,2006-12-17,01:20:00,4.634,0.152,237.17,19.4,0.0,35.0,0.0
...,...,...,...,...,...,...,...,...,...
2071589,2010-11-24,07:53:00,4.666,0.000,235.72,19.8,0.0,39.0,17.0
2071590,2010-11-24,07:54:00,4.694,0.000,236.78,19.8,0.0,39.0,18.0
2071591,2010-11-24,07:55:00,4.602,0.000,237.08,19.4,0.0,40.0,17.0
2071592,2010-11-24,07:56:00,4.536,0.000,237.03,19.0,0.0,39.0,17.0


In [23]:
def funct_3_np():
    arr2_3 = arr2[(arr2['Global_intensity'] >= 19) & (arr2['Global_intensity'] <= 20)]
    arr2_31 = arr2_3[arr2_3['Sub_metering_2'] > arr2_3['Sub_metering_3']]
    return arr2_31
funct_3_np()

array([('16/12/2006', '18:09:00', 4.464, 0.136, 234.66, 19. , 0., 37., 16.),
       ('17/12/2006', '01:04:00', 4.582, 0.258, 238.08, 19.6, 0., 13.,  0.),
       ('17/12/2006', '01:08:00', 4.618, 0.104, 239.61, 19.6, 0., 27.,  0.),
       ...,
       ('24/11/2010', '07:55:00', 4.602, 0.   , 237.08, 19.4, 0., 40., 17.),
       ('24/11/2010', '07:56:00', 4.536, 0.   , 237.03, 19. , 0., 39., 17.),
       ('24/11/2010', '07:57:00', 4.626, 0.   , 236.78, 19.4, 0., 39., 17.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<f8'), ('Global_reactive_power', '<f8'), ('Voltage', '<f8'), ('Global_intensity', '<f8'), ('Sub_metering_1', '<f8'), ('Sub_metering_2', '<f8'), ('Sub_metering_3', '<f8')])

In [24]:
timer(funct_3)
timer(funct_3_np)

Average execution time: 0.009650 seconds
Average execution time: 0.033651 seconds


## 4. Обрати випадковим чином 500000 домогосподарств (без повторів елементів вибірки), для них обчислити середні величини усіх 3-х груп споживання електричної енергії, а також

In [25]:
def funct_4():
    df_4 = df.sample(500000)
    df_4['Avarage consumption of electrical energy'] = (df_4['Sub_metering_1']+df_4['Sub_metering_2']+df_4['Sub_metering_3'])/3
    return df_4
funct_4()

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,Avarage consumption of electrical energy
192369,2007-04-29,07:33:00,0.232,0.078,234.57,1.0,0.0,0.0,0.0,0.000000
1394372,2009-08-11,00:56:00,1.030,0.394,240.15,4.6,0.0,0.0,12.0,4.000000
1480962,2009-10-10,04:06:00,2.012,0.186,240.73,8.4,0.0,0.0,29.0,9.666667
1631059,2010-01-22,09:43:00,1.846,0.000,242.61,7.6,0.0,0.0,19.0,6.333333
640498,2008-03-05,12:22:00,0.226,0.000,244.39,1.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...
247296,2007-06-06,11:00:00,1.444,0.154,229.66,6.2,0.0,0.0,16.0,5.333333
1200550,2009-03-29,10:34:00,1.560,0.000,244.80,6.4,0.0,0.0,18.0,6.000000
1296315,2009-06-03,22:39:00,0.446,0.132,242.18,1.8,0.0,1.0,1.0,0.666667
468225,2007-11-06,21:09:00,0.678,0.000,239.48,2.8,0.0,0.0,0.0,0.000000


In [26]:
random_indices_1 = np.random.choice(len(arr2), 500000, replace=False)
def funct_4_np():
    random_indices = np.random.choice(len(arr2), 500000, replace=False)
    arr2_4 = arr2[random_indices]
    return arr2_4
funct_4_np()

array([('25/1/2008', '19:09:00', 2.642, 0.08 , 236.08, 11.2, 0., 0.,  0.),
       ('8/6/2008', '11:48:00', 1.494, 0.306, 241.68,  6.2, 0., 1., 18.),
       ('21/11/2008', '07:56:00', 2.626, 0.   , 232.41, 11.2, 1., 0., 16.),
       ...,
       ('8/12/2007', '00:05:00', 0.536, 0.146, 242.85,  2.2, 0., 0.,  0.),
       ('14/5/2009', '00:14:00', 0.41 , 0.228, 243.98,  1.8, 0., 2.,  1.),
       ('19/6/2008', '14:09:00', 0.3  , 0.208, 237.47,  1.4, 0., 0.,  1.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<f8'), ('Global_reactive_power', '<f8'), ('Voltage', '<f8'), ('Global_intensity', '<f8'), ('Sub_metering_1', '<f8'), ('Sub_metering_2', '<f8'), ('Sub_metering_3', '<f8')])

In [27]:
random_indices_1

array([1944739, 1754626, 1141668, ...,  119946,  116051, 1080360])

In [28]:
timer(funct_4)
timer(funct_4_np)

Average execution time: 0.225694 seconds
Average execution time: 0.171980 seconds


## 5. Обрати ті домогосподарства, які після 18-00 споживають понад 6 кВт за хвилину в середньому, серед відібраних визначити ті, у яких основне споживання електроенергії у вказаний проміжок часу припадає на пральну машину, сушарку, холодильник та освітлення (група 2 є найбільшою), а потім обрати кожен третій результат із першої половини та кожен четвертий результат із другої половини.

In [29]:
df

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,2006-12-16,17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
1,2006-12-16,17:25:00,5.360,0.436,233.63,23.0,0.0,1.0,16.0
2,2006-12-16,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
3,2006-12-16,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
4,2006-12-16,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0
...,...,...,...,...,...,...,...,...,...
2075254,2010-11-26,20:58:00,0.946,0.000,240.43,4.0,0.0,0.0,0.0
2075255,2010-11-26,20:59:00,0.944,0.000,240.00,4.0,0.0,0.0,0.0
2075256,2010-11-26,21:00:00,0.938,0.000,239.82,3.8,0.0,0.0,0.0
2075257,2010-11-26,21:01:00,0.934,0.000,239.70,3.8,0.0,0.0,0.0


In [30]:
def funct_5():
    df_5 = df[df['Time'] > pd.to_datetime("18:00:00", format='%H:%M:%S').time()]
    df_51 = df_5[(df_5['Global_active_power']>6) & (df_5["Sub_metering_2"] > df_5["Sub_metering_1"])
                 & (df_5["Sub_metering_2"] > df_5["Sub_metering_3"])]
    col = df.columns.to_list()
    df_53 = pd.DataFrame(columns = col)
    df_54 = pd.DataFrame(columns = col)
    midpoint = len(df_51) // 2
    df_53 = df_51.iloc[0:midpoint:3]
    df_54 = df_51.iloc[midpoint::4]
    concatenated_df = pd.concat([df_53, df_54])
    return concatenated_df
funct_5()


Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
41,2006-12-16,18:05:00,6.052,0.192,232.93,26.2,0.0,37.0,17.0
44,2006-12-16,18:08:00,6.308,0.116,232.25,27.0,0.0,36.0,17.0
17494,2006-12-28,20:58:00,6.386,0.374,236.63,27.0,1.0,36.0,17.0
17498,2006-12-28,21:02:00,8.088,0.262,235.50,34.4,1.0,72.0,17.0
17501,2006-12-28,21:05:00,7.230,0.152,235.22,30.6,1.0,73.0,17.0
...,...,...,...,...,...,...,...,...,...
2066467,2010-11-20,18:31:00,6.830,0.394,229.39,30.2,24.0,34.0,16.0
2066471,2010-11-20,18:35:00,6.784,0.364,228.38,30.0,21.0,35.0,16.0
2066475,2010-11-20,18:39:00,6.362,0.372,229.83,28.0,16.0,35.0,16.0
2066479,2010-11-20,18:43:00,6.324,0.376,229.93,27.8,14.0,35.0,17.0


In [31]:
def funct_5_np():
    arr2_5 = arr2[(arr2['Time']>'18:00:00')]
    arr2_51 = arr2_5[(arr2_5['Global_active_power']>6) & (arr2_5["Sub_metering_2"] > arr2_5["Sub_metering_1"])
                     & (arr2_5["Sub_metering_2"] > arr2_5["Sub_metering_3"])]
    midpoint = arr2_51.shape[0]//2
    arr2_53 = arr2_51[:midpoint:3]
    arr2_54 = arr2_51[midpoint::4]
    arr2_55 = np.concatenate((arr2_53, arr2_54), axis=0)
    return arr2_55
show_5 = funct_5_np()
show_5[:20]

array([('16/12/2006', '18:05:00', 6.052, 0.192, 232.93, 26.2,  0., 37., 17.),
       ('16/12/2006', '18:08:00', 6.308, 0.116, 232.25, 27. ,  0., 36., 17.),
       ('28/12/2006', '20:58:00', 6.386, 0.374, 236.63, 27. ,  1., 36., 17.),
       ('28/12/2006', '21:02:00', 8.088, 0.262, 235.5 , 34.4,  1., 72., 17.),
       ('28/12/2006', '21:05:00', 7.23 , 0.152, 235.22, 30.6,  1., 73., 17.),
       ('28/12/2006', '21:08:00', 7.352, 0.   , 235.45, 31.2,  1., 73., 17.),
       ('28/12/2006', '21:11:00', 9.048, 0.   , 231.48, 39. , 34., 71., 16.),
       ('28/12/2006', '21:14:00', 9.118, 0.108, 231.18, 39.4, 36., 72., 16.),
       ('28/12/2006', '21:17:00', 7.04 , 0.13 , 233.27, 30.2, 37., 38., 17.),
       ('29/12/2006', '21:16:00', 6.146, 0.116, 230.53, 26.6,  0., 70.,  0.),
       ('29/12/2006', '21:19:00', 6.184, 0.138, 231.57, 26.6,  0., 70.,  0.),
       ('29/12/2006', '21:22:00', 6.214, 0.12 , 230.53, 26.8,  0., 70.,  0.),
       ('29/12/2006', '21:25:00', 6.086, 0.   , 229.04, 26.6,  0

In [32]:
timer(funct_5)
timer(funct_5_np)

Average execution time: 0.112234 seconds
Average execution time: 0.061997 seconds
