In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

def read_data_from_csv():
    hotels=pd.read_csv('zomato.csv')
    return hotels


def remove_unwanted_columns():
    hotels=read_data_from_csv()
    hotels.drop(columns=["address","phone"],axis=1,inplace=True)
    return hotels


def rename_columns():
    hotels = remove_unwanted_columns()
    hotels.rename(columns={"rate":"rating",'approx_cost(for two people)':"approx_cost", 'listed_in(type)':"type"},inplace=True)
    return hotels


#task3: handle  null values of each column
def null_value_check():
    hotels=rename_columns()
    hotels.dropna(subset=["name"],inplace=True)
    hotels['online_order'].fillna("NA",inplace=True)
    hotels['book_table'].fillna("NA",inplace=True)
    hotels['rating'].fillna(0,inplace=True)
    hotels['votes'].fillna(0,inplace=True)
    hotels['location'].fillna("NA",inplace=True)
    hotels['rest_type'].fillna("NA",inplace=True)
    hotels['dish_liked'].fillna("NA",inplace=True)
    hotels['cuisines'].fillna("NA",inplace=True)
    hotels['approx_cost'].fillna(0,inplace=True)
    hotels['type'].fillna("NA",inplace=True)
    return hotels


#task4 #find duplicates in the dataset
def find_duplicates():
    hotels=null_value_check()
    hotels.drop_duplicates(subset=None, keep='first', inplace=True)
    return hotels


#task5 removing irrelevant text from all the columns
def removing_irrelevant_text():
    hotels= find_duplicates()
    hotels=hotels[hotels['name'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['online_order'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['book_table'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['rating'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['votes'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['location'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['rest_type'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['dish_liked'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['cuisines'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['approx_cost'].str.contains('RATED|Rated')==False]
    hotels=hotels[hotels['type'].str.contains('RATED|Rated')==False]
    return hotels


#task6: check for unique values in each column and handle the irrelevant values
def check_for_unique_values():
    hotels=removing_irrelevant_text()
    hotels=hotels[hotels['online_order'].str.contains('Yes|No')==True]
    hotels["rating"]=hotels["rating"].replace("NEW",0)
    hotels["rating"]=hotels["rating"].str.replace("/5","")
    hotels["rating"]=hotels["rating"].replace("-",np.nan)
    hotels["rating"].fillna(0,inplace=True)
    return hotels


#task7: remove the unknown character from the dataset and export it to "zomatocleaned.csv"
def remove_the_unknown_character():
    dataframe=check_for_unique_values()
    dataframe["name"]=dataframe["name"].str.replace(r'[Ãx][^A-Za-z]+','')
    #export cleaned Dataset to newcsv file named "zomatocleaned.csv"
    dataframe.to_csv('zomatocleaned.csv')
    return dataframe


#check if mysql table is created using "zomatocleaned.csv"
#Use this final dataset and upload it on the provided database for performing analysis in  MySQL
#To Run this task first Run the appliation for Terminal to create table named 'Zomato' and then run test.
def start():
    remove_the_unknown_character()

def task_runner():
    start()
