# Data Pre-Processing to Create a Dataset

## Import Modules

In [1]:
import pandas as pd

import matplotlib.dates as mdates

## Load Data

In [2]:
# room number
x = 'E07'

#### Interval data (10 minutes)

In [3]:
raw_data_CO2 = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07CO2.csv", parse_dates=True)
data_CO2 = raw_data_CO2.copy()
data_CO2['DateTime'] = pd.to_datetime(data_CO2['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_CO2 = data_CO2.set_index('DateTime')

In [4]:
raw_data_ElL = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07ElL.csv", parse_dates=True)
data_ElL = raw_data_ElL.copy()
data_ElL['DateTime'] = pd.to_datetime(data_ElL['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_ElL = data_ElL.set_index('DateTime')

In [5]:
raw_data_ElPIT = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07ElPIT.csv", parse_dates=True)
data_ElPIT = raw_data_ElPIT.copy()
data_ElPIT['DateTime'] = pd.to_datetime(data_ElPIT['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_ElPIT = data_ElPIT.set_index('DateTime')

In [6]:
raw_data_ElPo = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07ElPo.csv", parse_dates=True)
data_ElPo = raw_data_ElPo.copy()
data_ElPo['DateTime'] = pd.to_datetime(data_ElPo['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_ElPo = data_ElPo.set_index('DateTime')

In [7]:
raw_data_Tair = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07Tair.csv", parse_dates=True)
data_Tair = raw_data_Tair.copy()
data_Tair['DateTime'] = pd.to_datetime(data_Tair['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_Tair = data_Tair.set_index('DateTime')

#### Event data (Change-of-Value-Logging)

In [8]:
raw_data_Occ = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07Occ.csv", parse_dates=True)
data_Occ = raw_data_Occ.copy()
data_Occ['DateTime'] = pd.to_datetime(data_Occ['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_Occ = data_Occ.set_index('DateTime')

In [9]:
raw_data_SP = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07SP.csv", parse_dates=True)
data_SP = raw_data_SP.copy()
data_SP['DateTime'] = pd.to_datetime(data_SP['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_SP = data_SP.set_index('DateTime')

In [10]:
raw_data_W = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07W.csv", parse_dates=True)
data_W = raw_data_W.copy()
data_W['DateTime'] = pd.to_datetime(data_W['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_W = data_W.set_index('DateTime')

In [11]:
raw_data_WT = pd.read_csv(r"C:\Users\Anna-Lena\Documents\Masterarbeit\Datasets\2_Long-term monitoring data\E07\NatSciData_E07WT.csv", parse_dates=True)
data_WT = raw_data_WT.copy()
data_WT['DateTime'] = pd.to_datetime(data_WT['DateTime'], format='%d.%m.%Y %H:%M:%S')
data_WT = data_WT.set_index('DateTime')

## Pre-Processing

#### Sort the data by DateTimeIndex

In [12]:
data_CO2 = data_CO2.sort_values(by=['DateTime'])
data_ElL = data_ElL.sort_values(by=['DateTime'])
data_ElPIT = data_ElPIT.sort_values(by=['DateTime'])
data_ElPo = data_ElPo.sort_values(by=['DateTime'])
data_Tair = data_Tair.sort_values(by=['DateTime'])
data_Occ = data_Occ.sort_values(by=['DateTime'])
data_SP = data_SP.sort_values(by=['DateTime'])
data_W = data_W.sort_values(by=['DateTime'])
data_WT = data_WT.sort_values(by=['DateTime'])

#### Select only the data of 2005 and 2006 and 2007-01-01

In [13]:
data_CO2 = data_CO2['2005':'2007-01-01']
data_ElL = data_ElL['2005':'2007-01-01']
data_ElPIT = data_ElPIT['2005':'2007-01-01']
data_ElPo = data_ElPo['2005':'2007-01-01']
data_Tair = data_Tair['2005':'2007-01-01']
data_Occ = data_Occ['2005':'2007-01-01']
data_SP = data_SP['2005':'2007-01-01']
data_W = data_W['2005':'2007-01-01']
data_WT = data_WT['2005':'2007-01-01']

### Event data

#### Resampling event data

Resampling the event data to fit the interval of 10 minutes of the interval data

Using ffill() ("forward fill") method to fill the missing values with the last value until a new value is logged.

In [18]:
data_Occ = data_Occ.resample('10min', origin=0).ffill()

In [19]:
data_W = data_W.resample('10min', origin=0).ffill()

In [20]:
data_WT = data_WT.resample('10min', origin=0).ffill()

The sun protection data is special: the first change of value is detected on 2005-03-03, but it is approved that the value before that has to be 0, because the sun protection in this office is not used during winter months, so before resampling we add a value for 2005-01-01 and for 2007-01-01, so the data loss isn't that big later

In [21]:
data_SP.loc[pd.Timestamp('2005-01-01 00:00:00')] = 0
data_SP.loc[pd.Timestamp('2007-01-01 23:50:00')] = 0
data_SP = data_SP.sort_values(by=['DateTime'])

In [22]:
data_SP = data_SP.resample('10min', origin=0).ffill()

#### Dropping the still existing NaN-values in each data series

##### Occupancy

In [24]:
# Check how many NaN-values are left
data_Occ.isnull().sum()

E07Occ    1
dtype: int64

In [25]:
# View first 5 entries to get the timestamp
data_Occ.head(5)

Unnamed: 0_level_0,E07Occ
DateTime,Unnamed: 1_level_1
2005-01-03 08:10:00,
2005-01-03 08:20:00,1.0
2005-01-03 08:30:00,1.0
2005-01-03 08:40:00,1.0
2005-01-03 08:50:00,1.0


In [26]:
data_Occ = data_Occ.drop(pd.Timestamp('2005-01-03 08:10:00'))

In [27]:
# Check again
data_Occ.isnull().sum()

E07Occ    0
dtype: int64

##### State of the window

In [28]:
# Check how many NaN-values are left
data_W.isnull().sum()

E07W    1
dtype: int64

In [29]:
# View first 5 entries to get the timestamp
data_W.head(5)

Unnamed: 0_level_0,E07W
DateTime,Unnamed: 1_level_1
2005-01-03 08:10:00,
2005-01-03 08:20:00,1.0
2005-01-03 08:30:00,1.0
2005-01-03 08:40:00,0.0
2005-01-03 08:50:00,1.0


In [30]:
data_W = data_W.drop(pd.Timestamp('2005-01-03 08:10:00'))

In [31]:
# Check again
data_W.isnull().sum()

E07W    0
dtype: int64

##### State of the top light window

In [32]:
# Check how many NaN-values are left
data_WT.isnull().sum()

E07WT    1
dtype: int64

In [33]:
# View first 5 entries to get the timestamp
data_WT.head(5)

Unnamed: 0_level_0,E07WT
DateTime,Unnamed: 1_level_1
2005-01-01 05:00:00,
2005-01-01 05:10:00,0.0
2005-01-01 05:20:00,0.0
2005-01-01 05:30:00,0.0
2005-01-01 05:40:00,0.0


In [34]:
data_WT = data_WT.drop(pd.Timestamp('2005-01-01 05:00:00'))

In [35]:
# Check again
data_WT.isnull().sum()

E07WT    0
dtype: int64

##### Sun Protection

In [36]:
# Check how many NaN-values are left
data_SP.isnull().sum()

E07SP    0
dtype: int64

#### Merge event data into one dataframe

In [38]:
# inner join for intersection of timestamps
from functools import reduce
frames = [data_Occ, data_SP, data_W, data_WT]
data_event = reduce(lambda left, right: pd.merge(left, right, on='DateTime', how='inner'), frames)

In [39]:
data_event

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-01-03 08:20:00,1.0,0,1.0,0.0
2005-01-03 08:30:00,1.0,0,1.0,0.0
2005-01-03 08:40:00,1.0,0,0.0,0.0
2005-01-03 08:50:00,1.0,0,1.0,0.0
2005-01-03 09:00:00,1.0,0,0.0,0.0
...,...,...,...,...
2006-12-29 08:20:00,1.0,0,0.0,0.0
2006-12-29 08:30:00,1.0,0,0.0,0.0
2006-12-29 08:40:00,1.0,0,0.0,0.0
2006-12-29 08:50:00,1.0,0,0.0,0.0


In [40]:
data_event.head(5)

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-01-03 08:20:00,1.0,0,1.0,0.0
2005-01-03 08:30:00,1.0,0,1.0,0.0
2005-01-03 08:40:00,1.0,0,0.0,0.0
2005-01-03 08:50:00,1.0,0,1.0,0.0
2005-01-03 09:00:00,1.0,0,0.0,0.0


In [41]:
data_event.tail(5)

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-12-29 08:20:00,1.0,0,0.0,0.0
2006-12-29 08:30:00,1.0,0,0.0,0.0
2006-12-29 08:40:00,1.0,0,0.0,0.0
2006-12-29 08:50:00,1.0,0,0.0,0.0
2006-12-29 09:00:00,1.0,0,0.0,0.0


In [42]:
data_event.shape

(104405, 4)

In [43]:
pd.DataFrame.to_csv(data_event, 'data_'+x+'_event.txt', sep=',', index=True)

In [44]:
pd.DataFrame.to_csv(data_event, 'data_'+x+'_event.csv', sep=',', index=True)

### Interval data

#### Merge interval data into one dataframe

In [45]:
# inner join for intersection of timestamps
frames = [data_CO2, data_ElL, data_ElPIT, data_ElPo, data_Tair]
data_interval = reduce(lambda left, right: pd.merge(left, right, on='DateTime', how='inner'), frames)

In [46]:
data_interval

Unnamed: 0_level_0,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-01 00:00:00,461.750,309.9,152,740.3,20.4
2005-01-01 00:10:00,461.750,309.9,152,740.3,20.8
2005-01-01 00:20:00,473.195,309.9,152,740.3,20.8
2005-01-01 00:30:00,461.750,309.9,152,740.3,20.8
2005-01-01 00:40:00,461.000,309.9,152,740.3,20.4
...,...,...,...,...,...
2007-01-01 23:00:00,424.500,481.2,152,1461.4,19.8
2007-01-01 23:10:00,425.253,481.2,152,1461.4,19.8
2007-01-01 23:20:00,425.249,481.2,152,1461.4,19.8
2007-01-01 23:30:00,425.250,481.2,152,1461.4,19.8


In [47]:
data_interval.head(5)

Unnamed: 0_level_0,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-01 00:00:00,461.75,309.9,152,740.3,20.4
2005-01-01 00:10:00,461.75,309.9,152,740.3,20.8
2005-01-01 00:20:00,473.195,309.9,152,740.3,20.8
2005-01-01 00:30:00,461.75,309.9,152,740.3,20.8
2005-01-01 00:40:00,461.0,309.9,152,740.3,20.4


In [48]:
data_interval.tail(5)

Unnamed: 0_level_0,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-01-01 23:00:00,424.5,481.2,152,1461.4,19.8
2007-01-01 23:10:00,425.253,481.2,152,1461.4,19.8
2007-01-01 23:20:00,425.249,481.2,152,1461.4,19.8
2007-01-01 23:30:00,425.25,481.2,152,1461.4,19.8
2007-01-01 23:40:00,425.555,481.2,152,1461.4,19.8


In [49]:
data_interval.shape

(98521, 5)

In [50]:
# Check if NaN-values are left
data_interval.isnull().sum()

E07CO2      0
E07ElL      0
E07ElPIT    0
E07ElPo     0
E07Tair     0
dtype: int64

In [51]:
pd.DataFrame.to_csv(data_interval, 'data_'+x+'_interval.txt', sep=',', index=True)

In [52]:
pd.DataFrame.to_csv(data_interval, 'data_'+x+'_interval.csv', sep=',', index=True)

### Create Dataset

#### Merge interval and event data together

In [53]:
# inner join for intersection of timestamps --> time series is not continous because of missing values in interval data!
frames = [data_event, data_interval]
data = reduce(lambda left, right: pd.merge(left, right, on='DateTime', how='inner'), frames)

In [54]:
data

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2005-01-03 08:20:00,1.0,0,1.0,0.0,675.679,309.9,152,744.0,20.2
2005-01-03 08:30:00,1.0,0,1.0,0.0,634.949,310.0,152,744.0,19.2
2005-01-03 08:40:00,1.0,0,0.0,0.0,606.654,310.0,152,744.0,19.4
2005-01-03 08:50:00,1.0,0,1.0,0.0,585.756,310.0,152,744.0,20.2
2005-01-03 09:00:00,1.0,0,0.0,0.0,572.781,310.1,152,744.1,19.4
...,...,...,...,...,...,...,...,...,...
2006-12-29 08:20:00,1.0,0,0.0,0.0,485.750,480.2,152,1460.7,19.2
2006-12-29 08:30:00,1.0,0,0.0,0.0,486.500,480.2,152,1460.8,19.8
2006-12-29 08:40:00,1.0,0,0.0,0.0,497.500,480.2,152,1460.8,20.1
2006-12-29 08:50:00,1.0,0,0.0,0.0,498.250,480.3,152,1460.8,20.1


In [55]:
# Check if NaN-values are left
data.isnull().sum()

E07Occ      0
E07SP       0
E07W        0
E07WT       0
E07CO2      0
E07ElL      0
E07ElPIT    0
E07ElPo     0
E07Tair     0
dtype: int64

In [56]:
# Add columns with year, month, day and weekday name
data['Year'] = data.index.year
data['Month'] = data.index.month
data['Day'] = data.index.day
data['Weekday Name'] = data.index.day_name()
data['Hour'] = data.index.hour
data['Minute'] = data.index.minute
data['Second'] = data.index.second

In [57]:
data.head(10)

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair,Year,Month,Day,Weekday Name,Hour,Minute,Second
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2005-01-03 08:20:00,1.0,0,1.0,0.0,675.679,309.9,152,744.0,20.2,2005,1,3,Monday,8,20,0
2005-01-03 08:30:00,1.0,0,1.0,0.0,634.949,310.0,152,744.0,19.2,2005,1,3,Monday,8,30,0
2005-01-03 08:40:00,1.0,0,0.0,0.0,606.654,310.0,152,744.0,19.4,2005,1,3,Monday,8,40,0
2005-01-03 08:50:00,1.0,0,1.0,0.0,585.756,310.0,152,744.0,20.2,2005,1,3,Monday,8,50,0
2005-01-03 09:00:00,1.0,0,0.0,0.0,572.781,310.1,152,744.1,19.4,2005,1,3,Monday,9,0,0
2005-01-03 09:10:00,1.0,0,0.0,0.0,558.75,310.1,152,744.1,20.2,2005,1,3,Monday,9,10,0
2005-01-03 09:20:00,1.0,0,0.0,0.0,558.75,310.1,152,744.1,20.8,2005,1,3,Monday,9,20,0
2005-01-03 09:30:00,1.0,0,0.0,0.0,632.35,310.1,152,744.1,21.1,2005,1,3,Monday,9,30,0
2005-01-03 09:40:00,1.0,0,0.0,0.0,595.251,310.2,152,744.1,21.1,2005,1,3,Monday,9,40,0
2005-01-03 09:50:00,1.0,0,1.0,0.0,583.0,310.2,152,744.1,21.1,2005,1,3,Monday,9,50,0


In [58]:
data.tail(10)

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair,Year,Month,Day,Weekday Name,Hour,Minute,Second
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2006-12-29 07:30:00,0.0,0,0.0,0.0,486.5,480.1,152,1460.7,18.8,2006,12,29,Friday,7,30,0
2006-12-29 07:40:00,0.0,0,0.0,0.0,486.5,480.1,152,1460.7,18.8,2006,12,29,Friday,7,40,0
2006-12-29 07:50:00,0.0,0,0.0,0.0,485.787,480.1,152,1460.7,18.8,2006,12,29,Friday,7,50,0
2006-12-29 08:00:00,0.0,0,0.0,0.0,485.25,480.1,152,1460.7,18.8,2006,12,29,Friday,8,0,0
2006-12-29 08:10:00,0.0,0,0.0,0.0,485.75,480.1,152,1460.7,18.8,2006,12,29,Friday,8,10,0
2006-12-29 08:20:00,1.0,0,0.0,0.0,485.75,480.2,152,1460.7,19.2,2006,12,29,Friday,8,20,0
2006-12-29 08:30:00,1.0,0,0.0,0.0,486.5,480.2,152,1460.8,19.8,2006,12,29,Friday,8,30,0
2006-12-29 08:40:00,1.0,0,0.0,0.0,497.5,480.2,152,1460.8,20.1,2006,12,29,Friday,8,40,0
2006-12-29 08:50:00,1.0,0,0.0,0.0,498.25,480.3,152,1460.8,20.1,2006,12,29,Friday,8,50,0
2006-12-29 09:00:00,1.0,0,0.0,0.0,497.5,480.3,152,1460.8,19.4,2006,12,29,Friday,9,0,0


In [59]:
data.sample(5)

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair,Year,Month,Day,Weekday Name,Hour,Minute,Second
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2005-10-21 12:40:00,0.0,0,0.0,0.0,583.0,365.6,152,1051.7,22.8,2005,10,21,Friday,12,40,0
2006-01-03 10:50:00,1.0,0,0.0,0.0,534.0,398.1,152,1128.8,21.1,2006,1,3,Tuesday,10,50,0
2005-03-13 05:50:00,0.0,0,0.0,0.0,450.0,344.8,152,819.8,19.8,2005,3,13,Sunday,5,50,0
2006-04-11 14:40:00,1.0,0,0.0,0.0,618.623,422.2,152,1222.7,21.8,2006,4,11,Tuesday,14,40,0
2006-03-24 07:10:00,0.0,0,0.0,0.0,485.25,421.3,152,1213.3,21.1,2006,3,24,Friday,7,10,0


In [60]:
data.shape

(97666, 16)

In [61]:
data.dtypes

E07Occ          float64
E07SP             int64
E07W            float64
E07WT           float64
E07CO2          float64
E07ElL          float64
E07ElPIT          int64
E07ElPo         float64
E07Tair         float64
Year              int64
Month             int64
Day               int64
Weekday Name     object
Hour              int64
Minute            int64
Second            int64
dtype: object

In [62]:
# Display random sampling of 5 rows
data.sample(5, random_state=5)

Unnamed: 0_level_0,E07Occ,E07SP,E07W,E07WT,E07CO2,E07ElL,E07ElPIT,E07ElPo,E07Tair,Year,Month,Day,Weekday Name,Hour,Minute,Second
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2005-08-25 13:40:00,1.0,0,1.0,0.0,423.75,355.6,152,991.4,23.4,2005,8,25,Thursday,13,40,0
2006-11-04 08:00:00,0.0,0,0.0,0.0,461.0,441.8,152,1424.6,20.1,2006,11,4,Saturday,8,0,0
2006-09-27 15:10:00,1.0,0,1.0,0.0,595.25,432.4,152,1395.9,22.4,2006,9,27,Wednesday,15,10,0
2006-12-18 23:40:00,0.0,0,0.0,0.0,473.5,473.6,152,1456.0,20.3,2006,12,18,Monday,23,40,0
2005-05-22 05:00:00,0.0,0,0.0,0.0,472.75,351.7,152,889.1,21.6,2005,5,22,Sunday,5,0,0


In [63]:
pd.DataFrame.to_csv(data, 'data_'+x+'.txt', sep=',', index=True)

In [64]:
pd.DataFrame.to_csv(data, 'data_'+x+'.csv', sep=',', index=True)