In [3]:
import numpy as np # mathematics 
import pandas as pd # data operations 
import seaborn as sns # data visualization 
import matplotlib.pyplot as plt # plotting graph, chart, etc
%matplotlib inline

# Sample dataframe

In [8]:
# read one of the csv
df = pd.read_csv("dataset/hotels/db_hotel_bali.csv")
df.head()

array(['Unnamed: 0', 'Hotel Image', 'Hotel Name', 'Tipe Properti', 'City',
       'Address', 'Hotel Rating', 'Min', 'Max', 'Lattitute', 'Longitude'],
      dtype=object)

# Concat all csv files of hotels

In [22]:
# concat all csv to hotels_dataset.csv 
import glob

interesting_files = glob.glob("dataset/hotels/*.csv")
header_saved = False
with open('dataset/hotels_dataset.csv','w') as fout:
    for filename in interesting_files:
        with open(filename) as fin:
            header = next(fin)
            if not header_saved:
                fout.write(header)
                header_saved = True
            for line in fin:
                fout.write(line)

# giving column names for new csv
column_name = pd.read_csv("dataset/hotels/db_hotel_bali.csv").columns.values
df = pd.read_csv('dataset/hotels_dataset.csv')
df.columns = column_name
df.head()

Unnamed: 0.1,Unnamed: 0,Hotel Image,Hotel Name,Tipe Properti,City,Address,Hotel Rating,Min,Max,Lattitute,Longitude
0,2,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,D Carol Hotel Surabaya,Hotel,Bubutan Surabaya,Jl. Gundih No. 8. Gundih. Kec. Bubutan. Kota S...,4.2,195000,224250,-7.247228,112.72946
1,3,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,Choice City Hotel,Hotel,Bubutan Surabaya,Bubutan. Surabaya City. East Java 60174. Indon...,4.4,269997,374996,-7.25599,112.733686
2,4,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,G Suites Hotel By AMITHYA,Hotel,Gubeng Surabaya,Jl. Raya Gubeng No.43. Gubeng. Kec. Gubeng. Ko...,4.0,343000,2254000,-7.273623,112.747475
3,5,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,Royal Regantris Cendana formerly Royal Singosa...,Hotel,Tegalsari Surabaya,Jl. Kombes Pol. Moh. Duryat No.6. Tegalsari. K...,4.3,389996,1199988,-7.268379,112.739621
4,6,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,Livinn Taman Melati Surabaya,Hotel,Mulyorejo Surabaya,Jl. Mulyorejo Utara No.201. RT.006/RW.001. Mul...,4.1,224999,249999,-7.261569,112.78547


# details

In [23]:
shape = df.shape
data_types = df.dtypes

print("shape : ", shape, "\n\ndata types : \n", data_types)

shape :  (621, 11) 

data types : 
 Unnamed: 0         int64
Hotel Image       object
Hotel Name        object
Tipe Properti     object
City              object
Address           object
Hotel Rating      object
Min                int64
Max                int64
Lattitute        float64
Longitude        float64
dtype: object


# dropping duplicate rows

In [27]:
duplicate_rows_df = df[df.duplicated()]
print("number of duplicates : ", duplicate_rows_df)

number of duplicates :  Empty DataFrame
Columns: [Unnamed: 0, Hotel Image, Hotel Name, Tipe Properti, City, Address, Hotel Rating, Min, Max, Lattitute, Longitude]
Index: []


In [25]:
df.count()

Unnamed: 0       621
Hotel Image      621
Hotel Name       621
Tipe Properti    621
City             621
Address          616
Hotel Rating     621
Min              621
Max              621
Lattitute        621
Longitude        621
dtype: int64

In [26]:
df = df.drop_duplicates()

# dropping the missing or null values

In [30]:
print(df.isnull().sum())

Unnamed: 0       0
Hotel Image      0
Hotel Name       0
Tipe Properti    0
City             0
Address          0
Hotel Rating     0
Min              0
Max              0
Lattitute        0
Longitude        0
dtype: int64


In [29]:
df = df.dropna()
df.count()

Unnamed: 0       616
Hotel Image      616
Hotel Name       616
Tipe Properti    616
City             616
Address          616
Hotel Rating     616
Min              616
Max              616
Lattitute        616
Longitude        616
dtype: int64

# get valuable data for MD

In [31]:
image_data = df["Hotel Image"]
image_data.head()

0    https://s-light.tiket.photos/t/01E25EBZS3W0FY9...
1    https://s-light.tiket.photos/t/01E25EBZS3W0FY9...
2    https://s-light.tiket.photos/t/01E25EBZS3W0FY9...
3    https://s-light.tiket.photos/t/01E25EBZS3W0FY9...
4    https://s-light.tiket.photos/t/01E25EBZS3W0FY9...
Name: Hotel Image, dtype: object

In [32]:
image_data.to_csv('image_data.csv')

# Delete unused columns

In [33]:
df.head()

Unnamed: 0.1,Unnamed: 0,Hotel Image,Hotel Name,Tipe Properti,City,Address,Hotel Rating,Min,Max,Lattitute,Longitude
0,2,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,D Carol Hotel Surabaya,Hotel,Bubutan Surabaya,Jl. Gundih No. 8. Gundih. Kec. Bubutan. Kota S...,4.2,195000,224250,-7.247228,112.72946
1,3,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,Choice City Hotel,Hotel,Bubutan Surabaya,Bubutan. Surabaya City. East Java 60174. Indon...,4.4,269997,374996,-7.25599,112.733686
2,4,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,G Suites Hotel By AMITHYA,Hotel,Gubeng Surabaya,Jl. Raya Gubeng No.43. Gubeng. Kec. Gubeng. Ko...,4.0,343000,2254000,-7.273623,112.747475
3,5,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,Royal Regantris Cendana formerly Royal Singosa...,Hotel,Tegalsari Surabaya,Jl. Kombes Pol. Moh. Duryat No.6. Tegalsari. K...,4.3,389996,1199988,-7.268379,112.739621
4,6,https://s-light.tiket.photos/t/01E25EBZS3W0FY9...,Livinn Taman Melati Surabaya,Hotel,Mulyorejo Surabaya,Jl. Mulyorejo Utara No.201. RT.006/RW.001. Mul...,4.1,224999,249999,-7.261569,112.78547


In [39]:
df1 = df
df1.drop(['Hotel Image'], inplace=True, axis=1)

KeyError: ignored

In [40]:
df1.head()

Unnamed: 0.1,Unnamed: 0,Hotel Name,Tipe Properti,City,Address,Hotel Rating,Min,Max,Lattitute,Longitude
0,2,D Carol Hotel Surabaya,Hotel,Bubutan Surabaya,Jl. Gundih No. 8. Gundih. Kec. Bubutan. Kota S...,4.2,195000,224250,-7.247228,112.72946
1,3,Choice City Hotel,Hotel,Bubutan Surabaya,Bubutan. Surabaya City. East Java 60174. Indon...,4.4,269997,374996,-7.25599,112.733686
2,4,G Suites Hotel By AMITHYA,Hotel,Gubeng Surabaya,Jl. Raya Gubeng No.43. Gubeng. Kec. Gubeng. Ko...,4.0,343000,2254000,-7.273623,112.747475
3,5,Royal Regantris Cendana formerly Royal Singosa...,Hotel,Tegalsari Surabaya,Jl. Kombes Pol. Moh. Duryat No.6. Tegalsari. K...,4.3,389996,1199988,-7.268379,112.739621
4,6,Livinn Taman Melati Surabaya,Hotel,Mulyorejo Surabaya,Jl. Mulyorejo Utara No.201. RT.006/RW.001. Mul...,4.1,224999,249999,-7.261569,112.78547
