In [0]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import mean_absolute_error as MAE
import gc
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [110]:
# Mount personal Google Drive; all data files are in personal Google Drive
# account: My main account @ Gmail
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
# Use PDrive function
!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [112]:
# List files in root directory
file_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()
for file1 in file_list:
  print('title: %s, id: %s' % (file1['title'], file1['id']))

title: https://www.goeugo.com, id: 17JhDtBrBDmyi-QUM35jM5TXoJ6_XdTbN
title: Ydata.csv, id: 1s8Kf5u9pfvGrDwv-F8PashL0tM0mV-B4
title:  對外說明簡報 , id: 1La_gNgrhH0HezUJGkobo1w3dXWAc3fRt
title: 說明會報名相關 , id: 1ZcD1tIxfiAELy7ZLERT6bFOU7i_Ts6r2
title: Colab Notebooks, id: 1kXGE13cjX-yz2222w9qTUyQMGPysUgn_
title: 照片集, id: 10c5xL8LrPRdsKAi_8Xaa4K1t29paSGCj
title: 小包團需求_空白表格_2019.docx, id: 1Ft1au82vSv8YO8JY5-olEOh1BDT-dpAk
title: 2019歐來歐去循環團.pdf, id: 1mqgf_JBjyXExI5tVKFp55gHGdq7KE_QK
title: 歐洲報價訊息一手取得_20190612.jpg, id: 1fE4e7mlNp78FtbXVSuZuz8Kb5JrL7Otl


In [113]:
# Check first 5 rows in the targeted file.
Ydata = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/23.csv', header=None, names=['Company Name', 'Person1', 'Person2', 'Title', 'Tel', 'Fax', 'Address', 'Email', 'Other'])
Ydata.head()

Unnamed: 0,Company Name,Person1,Person2,Title,Tel,Fax,Address,Email,Other
1,一陽旅行社股份有限公司,官文清,官文清,董事長,03-3336699,03-3336679,桃園市桃園區復興路389號5樓之1,suntour.taoyuan@gmail.com,
2,八德旅行社有限公司,游景樂,游象新,經理,03-3611445,03-3615153,桃園市八德區東勇北路168巷10弄6號,yupadetravel@hotmail.com,
3,力泰旅行社股份有限公司,林秋烈,林秋烈,負責人,03-3258699,03-3556568,桃園市桃園區愛八街5號1樓,a5013156@gmail.com,
4,卜鵬國際旅行社有限公司,邱贊因,邱贊因,總經理,03-3588383,03-3170002,桃園市桃園區中正路1003號2樓,cutepei0430@yahoo.com,
5,三信旅行社有限公司,張金球,張金球,總經理,03-4258017,03-4222715,桃園市中壢區中光路49號,sanhsintour@gmail.com,


In [114]:
# Drop laast column
Ydata.drop(Ydata.columns[[8]], axis=1, inplace=True)
Ydata.head()

Unnamed: 0,Company Name,Person1,Person2,Title,Tel,Fax,Address,Email
1,一陽旅行社股份有限公司,官文清,官文清,董事長,03-3336699,03-3336679,桃園市桃園區復興路389號5樓之1,suntour.taoyuan@gmail.com
2,八德旅行社有限公司,游景樂,游象新,經理,03-3611445,03-3615153,桃園市八德區東勇北路168巷10弄6號,yupadetravel@hotmail.com
3,力泰旅行社股份有限公司,林秋烈,林秋烈,負責人,03-3258699,03-3556568,桃園市桃園區愛八街5號1樓,a5013156@gmail.com
4,卜鵬國際旅行社有限公司,邱贊因,邱贊因,總經理,03-3588383,03-3170002,桃園市桃園區中正路1003號2樓,cutepei0430@yahoo.com
5,三信旅行社有限公司,張金球,張金球,總經理,03-4258017,03-4222715,桃園市中壢區中光路49號,sanhsintour@gmail.com


In [0]:
# Remove some characters in a column
Ydata['Company Name'] = Ydata['Company Name'].map(lambda x: x.rstrip('股份有限公司'))



In [116]:
# Merge 2 columns into a new one
Ydata['Name'] = Ydata['Company Name'] + Ydata['Person2']

Ydata.head()

Unnamed: 0,Company Name,Person1,Person2,Title,Tel,Fax,Address,Email,Name
1,一陽旅行社,官文清,官文清,董事長,03-3336699,03-3336679,桃園市桃園區復興路389號5樓之1,suntour.taoyuan@gmail.com,一陽旅行社官文清
2,八德旅行社,游景樂,游象新,經理,03-3611445,03-3615153,桃園市八德區東勇北路168巷10弄6號,yupadetravel@hotmail.com,八德旅行社游象新
3,力泰旅行社,林秋烈,林秋烈,負責人,03-3258699,03-3556568,桃園市桃園區愛八街5號1樓,a5013156@gmail.com,力泰旅行社林秋烈
4,卜鵬國際旅行社,邱贊因,邱贊因,總經理,03-3588383,03-3170002,桃園市桃園區中正路1003號2樓,cutepei0430@yahoo.com,卜鵬國際旅行社邱贊因
5,三信旅行社,張金球,張金球,總經理,03-4258017,03-4222715,桃園市中壢區中光路49號,sanhsintour@gmail.com,三信旅行社張金球


In [117]:
# Drop a column
Ydata = Ydata.drop(['Person1'], axis=1)

Ydata.head()

Unnamed: 0,Company Name,Person2,Title,Tel,Fax,Address,Email,Name
1,一陽旅行社,官文清,董事長,03-3336699,03-3336679,桃園市桃園區復興路389號5樓之1,suntour.taoyuan@gmail.com,一陽旅行社官文清
2,八德旅行社,游象新,經理,03-3611445,03-3615153,桃園市八德區東勇北路168巷10弄6號,yupadetravel@hotmail.com,八德旅行社游象新
3,力泰旅行社,林秋烈,負責人,03-3258699,03-3556568,桃園市桃園區愛八街5號1樓,a5013156@gmail.com,力泰旅行社林秋烈
4,卜鵬國際旅行社,邱贊因,總經理,03-3588383,03-3170002,桃園市桃園區中正路1003號2樓,cutepei0430@yahoo.com,卜鵬國際旅行社邱贊因
5,三信旅行社,張金球,總經理,03-4258017,03-4222715,桃園市中壢區中光路49號,sanhsintour@gmail.com,三信旅行社張金球


In [118]:
# Rename some columns
Ydata = Ydata.rename(index=str, columns={"Tel": "Phone 1 - Type", "Fax": "Phone 2 - Type", "Person2": "Family Name", "Company Name": "Given Name", "Title": "Occupation", "Email":"E-mail 1 - Value"})

Ydata.head()

Unnamed: 0,Given Name,Family Name,Occupation,Phone 1 - Type,Phone 2 - Type,Address,E-mail 1 - Value,Name
1,一陽旅行社,官文清,董事長,03-3336699,03-3336679,桃園市桃園區復興路389號5樓之1,suntour.taoyuan@gmail.com,一陽旅行社官文清
2,八德旅行社,游象新,經理,03-3611445,03-3615153,桃園市八德區東勇北路168巷10弄6號,yupadetravel@hotmail.com,八德旅行社游象新
3,力泰旅行社,林秋烈,負責人,03-3258699,03-3556568,桃園市桃園區愛八街5號1樓,a5013156@gmail.com,力泰旅行社林秋烈
4,卜鵬國際旅行社,邱贊因,總經理,03-3588383,03-3170002,桃園市桃園區中正路1003號2樓,cutepei0430@yahoo.com,卜鵬國際旅行社邱贊因
5,三信旅行社,張金球,總經理,03-4258017,03-4222715,桃園市中壢區中光路49號,sanhsintour@gmail.com,三信旅行社張金球


In [119]:
# Change the order
Ydata = Ydata[['Name', 'Given Name', 'Family Name', 'Occupation', 'Phone 1 - Type', 'Phone 2 - Type', 'Address', 'E-mail 1 - Value']]

Ydata.head()

Unnamed: 0,Name,Given Name,Family Name,Occupation,Phone 1 - Type,Phone 2 - Type,Address,E-mail 1 - Value
1,一陽旅行社官文清,一陽旅行社,官文清,董事長,03-3336699,03-3336679,桃園市桃園區復興路389號5樓之1,suntour.taoyuan@gmail.com
2,八德旅行社游象新,八德旅行社,游象新,經理,03-3611445,03-3615153,桃園市八德區東勇北路168巷10弄6號,yupadetravel@hotmail.com
3,力泰旅行社林秋烈,力泰旅行社,林秋烈,負責人,03-3258699,03-3556568,桃園市桃園區愛八街5號1樓,a5013156@gmail.com
4,卜鵬國際旅行社邱贊因,卜鵬國際旅行社,邱贊因,總經理,03-3588383,03-3170002,桃園市桃園區中正路1003號2樓,cutepei0430@yahoo.com
5,三信旅行社張金球,三信旅行社,張金球,總經理,03-4258017,03-4222715,桃園市中壢區中光路49號,sanhsintour@gmail.com


In [120]:
# Drop a column
Ydata = Ydata.drop(['Address'], axis=1)

# Add some columns
Ydata['Group Membership'] = "桃園旅行社 _201907 ::: * myContacts"

Ydata['E-mail 1 - Type'] = 'Work'

Ydata['Phone 1 - Value'] = 'Work'
              
Ydata['Phone 2 - Value'] = 'Fax'

Ydata.head()

Unnamed: 0,Name,Given Name,Family Name,Occupation,Phone 1 - Type,Phone 2 - Type,E-mail 1 - Value,Group Membership,E-mail 1 - Type,Phone 1 - Value,Phone 2 - Value
1,一陽旅行社官文清,一陽旅行社,官文清,董事長,03-3336699,03-3336679,suntour.taoyuan@gmail.com,桃園旅行社 _201907 ::: * myContacts,Work,Work,Fax
2,八德旅行社游象新,八德旅行社,游象新,經理,03-3611445,03-3615153,yupadetravel@hotmail.com,桃園旅行社 _201907 ::: * myContacts,Work,Work,Fax
3,力泰旅行社林秋烈,力泰旅行社,林秋烈,負責人,03-3258699,03-3556568,a5013156@gmail.com,桃園旅行社 _201907 ::: * myContacts,Work,Work,Fax
4,卜鵬國際旅行社邱贊因,卜鵬國際旅行社,邱贊因,總經理,03-3588383,03-3170002,cutepei0430@yahoo.com,桃園旅行社 _201907 ::: * myContacts,Work,Work,Fax
5,三信旅行社張金球,三信旅行社,張金球,總經理,03-4258017,03-4222715,sanhsintour@gmail.com,桃園旅行社 _201907 ::: * myContacts,Work,Work,Fax


In [121]:
# Add some characters 
Ydata['Name'] = '桃園' + Ydata['Name'].astype(str)

# Change the order
Ydata = Ydata[['Name','Given Name','Family Name','Occupation','Group Membership','Phone 1 - Type','Phone 1 - Value','Phone 2 - Type','Phone 2 - Value','E-mail 1 - Value','E-mail 1 - Type']]

Ydata.head()

Unnamed: 0,Name,Given Name,Family Name,Occupation,Group Membership,Phone 1 - Type,Phone 1 - Value,Phone 2 - Type,Phone 2 - Value,E-mail 1 - Value,E-mail 1 - Type
1,桃園一陽旅行社官文清,一陽旅行社,官文清,董事長,桃園旅行社 _201907 ::: * myContacts,03-3336699,Work,03-3336679,Fax,suntour.taoyuan@gmail.com,Work
2,桃園八德旅行社游象新,八德旅行社,游象新,經理,桃園旅行社 _201907 ::: * myContacts,03-3611445,Work,03-3615153,Fax,yupadetravel@hotmail.com,Work
3,桃園力泰旅行社林秋烈,力泰旅行社,林秋烈,負責人,桃園旅行社 _201907 ::: * myContacts,03-3258699,Work,03-3556568,Fax,a5013156@gmail.com,Work
4,桃園卜鵬國際旅行社邱贊因,卜鵬國際旅行社,邱贊因,總經理,桃園旅行社 _201907 ::: * myContacts,03-3588383,Work,03-3170002,Fax,cutepei0430@yahoo.com,Work
5,桃園三信旅行社張金球,三信旅行社,張金球,總經理,桃園旅行社 _201907 ::: * myContacts,03-4258017,Work,03-4222715,Fax,sanhsintour@gmail.com,Work


In [0]:
#　Ｏutput to a csv file.
Ydata.to_csv("/content/gdrive/My Drive/Colab Notebooks/Ydata.csv")