# Missing Data

In [1]:
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('MissingData').getOrCreate()

cores = spark._jsc.sc().getExecutorMemoryStatus().keySet().size()
print("You are working with", cores, "cores(s)")
spark

You are working with 1 cores(s)


### DataSet
**Source:** https://www.kaggle.com/himanshupoddar/zomato-bangalore-restaurants

In [2]:
zomato = spark.read.csv('Datasets/zomato.csv', inferSchema=True, header=True)

In [3]:
print(zomato.printSchema())

root
 |-- url: string (nullable = true)
 |-- address: string (nullable = true)
 |-- name: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- book_table: string (nullable = true)
 |-- rate: string (nullable = true)
 |-- votes: string (nullable = true)
 |-- phone: string (nullable = true)
 |-- location: string (nullable = true)
 |-- rest_type: string (nullable = true)
 |-- dish_liked: string (nullable = true)
 |-- cuisines: string (nullable = true)
 |-- approx_cost(for two people): string (nullable = true)
 |-- reviews_list: string (nullable = true)
 |-- menu_item: string (nullable = true)
 |-- listed_in(type): string (nullable = true)
 |-- listed_in(city): string (nullable = true)

None


In [4]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

df = zomato.withColumn("approx_cost(for two people)", zomato["approx_cost(for two people)"].cast(IntegerType())) \
            .withColumn("votes", zomato["votes"].cast(IntegerType()))
print(df.printSchema())

root
 |-- url: string (nullable = true)
 |-- address: string (nullable = true)
 |-- name: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- book_table: string (nullable = true)
 |-- rate: string (nullable = true)
 |-- votes: integer (nullable = true)
 |-- phone: string (nullable = true)
 |-- location: string (nullable = true)
 |-- rest_type: string (nullable = true)
 |-- dish_liked: string (nullable = true)
 |-- cuisines: string (nullable = true)
 |-- approx_cost(for two people): integer (nullable = true)
 |-- reviews_list: string (nullable = true)
 |-- menu_item: string (nullable = true)
 |-- listed_in(type): string (nullable = true)
 |-- listed_in(city): string (nullable = true)

None


In [5]:
df.limit(4).toPandas()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775.0,080 42297555,,,,,,,,,
1,"+91 9743772233""",Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,,('Rated 4.0','RATED\n You canÃ\x83Ã\x83Ã\x82Ã\x82Ã\x...,('Rated 5.0','RATED\n Overdelighted by the service and fo...,('Rated 4.0',,('Rated 4.0','RATED\n The place is nice and comfortable. ...,('Rated 4.0','RATED\n The place is nice and comfortable. ...
2,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787.0,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800.0,"""[('Rated 4.0', 'RATED\n Had been here for di...",rice was well cooked and overall was great\n\n...,('Rated 5.0','RATED\n This place just cool ? with good am...
3,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918.0,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800.0,"""[('Rated 3.0', """"RATED\n Ambience is not tha...",('Rated 3.0',"""""RATED\n \nWent there for a quick bite with ...",pasta churros and lasagne.\n\nNachos were pat...


In [9]:
df.filter(df.cuisines.isNull()).select(['name', 'cuisines']).show(5)

+---------------+--------+
|           name|cuisines|
+---------------+--------+
|          Jalsa|    null|
|  Grand Village|    null|
|  Casual Dining|    null|
|Timepass Dinner|    null|
|  Casual Dining|    null|
+---------------+--------+
only showing top 5 rows



In [11]:
from pyspark.sql.functions import *

def null_value_calc(df):
    null_columns_counts = []
    numRows = df.count()
    for k in df.columns:
        nullRows = df.where(col(k).isNull()).count()
        if(nullRows > 0):
            temp = k,nullRows,(nullRows/numRows)*100
            null_columns_counts.append(temp)
    return(null_columns_counts)

null_columns_calc_list = null_value_calc(df)
spark.createDataFrame(null_columns_calc_list, ['Column_Name', 'Null_Values_Count', 'Null_Value_Percent']).show()

+--------------------+-----------------+--------------------+
|         Column_Name|Null_Values_Count|  Null_Value_Percent|
+--------------------+-----------------+--------------------+
|                name|               85| 0.11849993029415865|
|        online_order|             8111|  11.307681583716715|
|          book_table|                2|0.002788233653980...|
|                rate|             7775|  10.839258329848041|
|               votes|            20018|  27.907430642687856|
|               phone|             1227|  1.7105813467168547|
|            location|            20054|  27.957618848459504|
|           rest_type|            20165|    28.1123658162554|
|          dish_liked|            46841|   65.30182629304335|
|            cuisines|            27305|   38.06635996096473|
|approx_cost(for t...|            43611|  60.798828941865324|
|        reviews_list|            28185|  39.293182768716015|
|           menu_item|            28611|    39.8870765370138|
|     li

In [17]:
from pyspark.sql.functions import isnan, when, count, col
nulls = df.select([count(when(isnan(c) | col(c).isNull(), c)).alias(c) for c in df.columns])
percent = df.select([format_number(((count(when(isnan(c) | col(c).isNull(), c))/df.count())*100),1).alias(c) for c in df.columns])
result = nulls.union(percent)
result.toPandas()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,0.0,0.0,85.0,8111.0,2.0,7775.0,20018.0,1227.0,20054.0,20165.0,46841.0,27305.0,43611.0,28185.0,28611.0,28983.0,29344.0
1,0.0,0.0,0.1,11.3,0.0,10.8,27.9,1.7,28.0,28.1,65.3,38.1,60.8,39.3,39.9,40.4,40.9


In [21]:
df.na.drop().limit(4).toPandas()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"""[('Rated 4.0', 'RATED\n Had been here for di...",rice was well cooked and overall was great\n\n...,('Rated 5.0','RATED\n This place just cool ? with good am...
1,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"""[('Rated 3.0', """"RATED\n Ambience is not tha...",('Rated 3.0',"""""RATED\n \nWent there for a quick bite with ...",pasta churros and lasagne.\n\nNachos were pat...
2,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"""[('Rated 4.0', """"RATED\n Great food and prop...",('Rated 2.0','RATED\n Reached the place at 3pm on Saturda...,('Rated 4.0'
3,https://www.zomato.com/bangalore/cafe-shuffle-...,"941, 3rd FLOOR, 21st Main, 22nd Cross, Banasha...",Cafe Shuffle,Yes,Yes,4.2/5,150,+91 9742166777,Banashankari,Cafe,"Mocktails, Peri Fries, Lasagne, Pizza, Chicken...","Cafe, Italian, Continental",600,"""[('Rated 1.0', """"RATED\n \n\nHorrible. Not ev...","you get it literally half an hour late."""")",('Rated 4.0',"""""RATED\n While this place is more common fo..."


In [28]:
og_len = df.count()
drop_len =df.na.drop().count()
print("Total Rows: ", og_len)
print("Total Rows Dropped: ", og_len-drop_len)
print("Percentage of Rows Dropped", ((og_len-drop_len)/og_len)*100)

Total Rows:  71730
Total Rows Dropped:  63124
Percentage of Rows Dropped 88.00223058692318


In [29]:
og_len = df.count()
drop_len = df.na.drop(subset=["votes"]).count()
print("Total Rows Dropped:", og_len-drop_len)
print("Percentage of Rows Dropped", ((og_len-drop_len)/og_len)*100)

Total Rows Dropped: 20018
Percentage of Rows Dropped 27.907430642687856


In [31]:
og_len =df.count()
drop_len = df.filter(df.rate.isNotNull()).count()
print("Total Rows Dropped:", og_len-drop_len)
print("Percentage of Rows Dropped", ((og_len-drop_len)/og_len)*100)

Total Rows Dropped: 7775
Percentage of Rows Dropped 10.839258329848041


In [32]:
# Fill the missing values
df.na.fill('MISSING').limit(4).toPandas()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775.0,080 42297555,MISSING,MISSING,MISSING,MISSING,,MISSING,MISSING,MISSING,MISSING
1,"+91 9743772233""",Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,,('Rated 4.0','RATED\n You canÃ\x83Ã\x83Ã\x82Ã\x82Ã\x...,('Rated 5.0','RATED\n Overdelighted by the service and fo...,('Rated 4.0',,('Rated 4.0','RATED\n The place is nice and comfortable. ...,('Rated 4.0','RATED\n The place is nice and comfortable. ...
2,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787.0,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800.0,"""[('Rated 4.0', 'RATED\n Had been here for di...",rice was well cooked and overall was great\n\n...,('Rated 5.0','RATED\n This place just cool ? with good am...
3,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918.0,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800.0,"""[('Rated 3.0', """"RATED\n Ambience is not tha...",('Rated 3.0',"""""RATED\n \nWent there for a quick bite with ...",pasta churros and lasagne.\n\nNachos were pat...


In [33]:
df.filter(df.name.isNull()).na.fill('No Name', subset=['name']).limit(5).toPandas()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,"+91 9986692090""",BTM,No Name,"Momos, Oreo Shake","Mughlai, North Indian, Chinese, Momos",600,,('Rated 3.0','RATED\n Simple food with great north indian...,"['Fry Chicken Kabab [5 Pieces]', 'Fry Chicken ...",Delivery,Bannerghatta Road,,,,,
1,"00 805074123""",BTM,No Name,,"North Indian, Chinese, Arabian",700,,[],Delivery,Bannerghatta Road,,,,,,,
2,"+91 8971051846""",Bannerghatta Road,No Name,,"Street Food, Burger",150,,[],Delivery,Bannerghatta Road,,,,,,,
3,"080 39457777""",Bannerghatta Road,No Name,"Chicken Biryani, Hyderabadi Biryani, Rolls, Mu...","Biryani, North Indian",500,,('Rated 3.0','RATED\n too much oil in rice items'),('Rated 2.0','RATED\n salan was not provided'),('Rated 1.0',,('Rated 3.0','RATED\n ok ok biryani'),('Rated 1.0',"""""RATED\n poor test & quality... will never ..."
4,"+91 8971051846""",Bannerghatta Road,No Name,,"Street Food, Burger",150,,[],Dine-out,Bannerghatta Road,,,,,,,


In [36]:
def fill_with_mean(df, include=set()):
    stats = df.agg(*(avg(c).alias(c) for c in df.columns if c in include))
    return df.na.fill(stats.first().asDict())

updated_df = fill_with_mean(df, ["votes"])
updated_df.limit(5).toPandas()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555,,,,,,,,,
1,"+91 9743772233""",Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,283,('Rated 4.0','RATED\n You canÃ\x83Ã\x83Ã\x82Ã\x82Ã\x...,('Rated 5.0','RATED\n Overdelighted by the service and fo...,('Rated 4.0',,('Rated 4.0','RATED\n The place is nice and comfortable. ...,('Rated 4.0','RATED\n The place is nice and comfortable. ...
2,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800.0,"""[('Rated 4.0', 'RATED\n Had been here for di...",rice was well cooked and overall was great\n\n...,('Rated 5.0','RATED\n This place just cool ? with good am...
3,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800.0,"""[('Rated 3.0', """"RATED\n Ambience is not tha...",('Rated 3.0',"""""RATED\n \nWent there for a quick bite with ...",pasta churros and lasagne.\n\nNachos were pat...
4,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300.0,"""[('Rated 4.0', """"RATED\n Great food and prop...",('Rated 2.0','RATED\n Reached the place at 3pm on Saturda...,('Rated 4.0'


## Dataset
**Source:** https://www.kaggle.com/meinertsen/new-york-city-taxi-trip-hourly-weather-data

In [53]:
path = '/user/harishmohan/Datasets/'
df = spark.read.csv(path + 'Weather.csv', inferSchema=True, header=True)

In [54]:
df.limit(8).toPandas()

Unnamed: 0,pickup_datetime,tempm,tempi,dewptm,dewpti,hum,wspdm,wspdi,wgustm,wgusti,...,precipm,precipi,conds,icon,fog,rain,snow,hail,thunder,tornado
0,2015-12-31 00:15:00,7.8,46.0,6.1,43.0,89.0,7.4,4.6,,,...,0.5,0.02,Light Rain,rain,0,1,0,0,0,0
1,2015-12-31 00:42:00,7.8,46.0,6.1,43.0,89.0,7.4,4.6,,,...,0.8,0.03,Overcast,cloudy,0,0,0,0,0,0
2,2015-12-31 00:51:00,7.8,46.0,6.1,43.0,89.0,5.6,3.5,,,...,0.8,0.03,Overcast,cloudy,0,0,0,0,0,0
3,2015-12-31 01:51:00,7.2,45.0,5.6,42.1,90.0,7.4,4.6,,,...,0.3,0.01,Overcast,cloudy,0,0,0,0,0,0
4,2015-12-31 02:51:00,7.2,45.0,5.6,42.1,90.0,0.0,0.0,,,...,,,Overcast,cloudy,0,0,0,0,0,0
5,2015-12-31 03:28:00,6.7,44.1,5.0,41.0,89.0,7.4,4.6,,,...,,,Overcast,cloudy,0,0,0,0,0,0
6,2015-12-31 03:40:00,7.2,45.0,5.0,41.0,86.0,0.0,0.0,,,...,,,Overcast,cloudy,0,0,0,0,0,0
7,2015-12-31 03:51:00,7.2,45.0,5.0,41.0,86.0,7.4,4.6,,,...,,,Overcast,cloudy,0,0,0,0,0,0


In [55]:
print(df.printSchema())

root
 |-- pickup_datetime: timestamp (nullable = true)
 |-- tempm: double (nullable = true)
 |-- tempi: double (nullable = true)
 |-- dewptm: double (nullable = true)
 |-- dewpti: double (nullable = true)
 |-- hum: double (nullable = true)
 |-- wspdm: double (nullable = true)
 |-- wspdi: double (nullable = true)
 |-- wgustm: double (nullable = true)
 |-- wgusti: double (nullable = true)
 |-- wdird: integer (nullable = true)
 |-- wdire: string (nullable = true)
 |-- vism: double (nullable = true)
 |-- visi: double (nullable = true)
 |-- pressurem: double (nullable = true)
 |-- pressurei: double (nullable = true)
 |-- windchillm: double (nullable = true)
 |-- windchilli: double (nullable = true)
 |-- heatindexm: double (nullable = true)
 |-- heatindexi: double (nullable = true)
 |-- precipm: double (nullable = true)
 |-- precipi: double (nullable = true)
 |-- conds: string (nullable = true)
 |-- icon: string (nullable = true)
 |-- fog: integer (nullable = true)
 |-- rain: integer (nullab

In [56]:
from pyspark.sql.functions import *

def null_value_calc(df):
    null_columns_counts = []
    numRows = df.count()
    for k in df.columns: 
        nullRows = df.where(col(k).isNull()).count()
        if(nullRows > 0):
            temp = k,nullRows,(nullRows/numRows)*100
            null_columns_counts.append(temp)
    return(null_columns_counts)

null_columns_calc_list = null_values_calc(df)
spark.createDataFrame(null_columns_calc_list, ['Column_with_Null_Value', 'Null_Values_Counts', 'Null_value_Percent']).show()

+----------------------+------------------+-------------------+
|Column_with_Null_Value|Null_Values_Counts| Null_value_Percent|
+----------------------+------------------+-------------------+
|                 tempm|                 5|0.04770537162484496|
|                 tempi|                 5|0.04770537162484496|
|                dewptm|                 5|0.04770537162484496|
|                dewpti|                 5|0.04770537162484496|
|                   hum|                 5|0.04770537162484496|
|                 wspdm|               737|  7.031771777502146|
|                 wspdi|               737|  7.031771777502146|
|                wgustm|              8605|  82.10094456635817|
|                wgusti|              8605|  82.10094456635817|
|                  vism|               245| 2.3375632096174033|
|                  visi|               245| 2.3375632096174033|
|             pressurem|               239| 2.2803167636675887|
|             pressurei|               2

In [58]:
og_len = df.count()
drop_len = df.na.drop().count()
print("Total Rows in the DF: ", og_len)
print("Total Rows Dropped: ", og_len-drop_len)
print("Percentage of Rows Dropped: ", (og_len-drop_len)/og_len)

Total Rows in the DF:  10481
Total Rows Dropped:  10481
Percentage of Rows Dropped:  1.0


In [59]:
og_len = df.count()
drop_len = df.na.drop(thresh=12).count()
print("Total Rows Dropped:", og_len-drop_len)
print("Percentage of Rows Dropped", (og_len-drop_len)/og_len)

Total Rows Dropped: 5
Percentage of Rows Dropped 0.00047705371624844956


In [61]:
# Drop according to specfic columns
og_len = df.count()
drop_len = df.na.drop(subset = ["tempm"]).count()
print("Total Rows Dropped: ", og_len-drop_len)
print("Percentage of Rows Dropped", (og_len-drop_len)/og_len)

Total Rows Dropped:  5
Percentage of Rows Dropped 0.00047705371624844956


In [62]:
null_fill = df.na.fill('N/A')
null_fill.limit(4).toPandas()

Unnamed: 0,pickup_datetime,tempm,tempi,dewptm,dewpti,hum,wspdm,wspdi,wgustm,wgusti,...,precipm,precipi,conds,icon,fog,rain,snow,hail,thunder,tornado
0,2015-12-31 00:15:00,7.8,46.0,6.1,43.0,89.0,7.4,4.6,,,...,0.5,0.02,Light Rain,rain,0,1,0,0,0,0
1,2015-12-31 00:42:00,7.8,46.0,6.1,43.0,89.0,7.4,4.6,,,...,0.8,0.03,Overcast,cloudy,0,0,0,0,0,0
2,2015-12-31 00:51:00,7.8,46.0,6.1,43.0,89.0,5.6,3.5,,,...,0.8,0.03,Overcast,cloudy,0,0,0,0,0,0
3,2015-12-31 01:51:00,7.2,45.0,5.6,42.1,90.0,7.4,4.6,,,...,0.3,0.01,Overcast,cloudy,0,0,0,0,0,0


In [63]:
# Fill in NaN values with avrages
def fill_with_mean(df, include=set()):
    stats = df.agg(*(
        avg(c).alias(c) for c in df.columns if c in include
    ))
    return df.na.fill(stats.first().asDict())

updated_df = fill_with_mean(df, ["tempm", "tempi"])
updated_df.limit(5).toPandas()

Unnamed: 0,pickup_datetime,tempm,tempi,dewptm,dewpti,hum,wspdm,wspdi,wgustm,wgusti,...,precipm,precipi,conds,icon,fog,rain,snow,hail,thunder,tornado
0,2015-12-31 00:15:00,7.8,46.0,6.1,43.0,89.0,7.4,4.6,,,...,0.5,0.02,Light Rain,rain,0,1,0,0,0,0
1,2015-12-31 00:42:00,7.8,46.0,6.1,43.0,89.0,7.4,4.6,,,...,0.8,0.03,Overcast,cloudy,0,0,0,0,0,0
2,2015-12-31 00:51:00,7.8,46.0,6.1,43.0,89.0,5.6,3.5,,,...,0.8,0.03,Overcast,cloudy,0,0,0,0,0,0
3,2015-12-31 01:51:00,7.2,45.0,5.6,42.1,90.0,7.4,4.6,,,...,0.3,0.01,Overcast,cloudy,0,0,0,0,0,0
4,2015-12-31 02:51:00,7.2,45.0,5.6,42.1,90.0,0.0,0.0,,,...,,,Overcast,cloudy,0,0,0,0,0,0
