In [None]:

import pandas as pd
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import requests

# Part-1 (Load Data)

## 1.Load Zoo data

In [None]:
PATH_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/zoo/zoo.data"

# Save column names as a list
col_ls = ['animal name', 'hair', 'feathers','eggs','milk','airborne','aquatic','predator',
 'toothed','backbone','breathes','venomous','fins','legs','tail','domestic',
 'catsize','type']

# Pass in col_ls into names parameter to explicitly name each column
df_zoo = pd.read_csv(PATH_URL, 
                     names=col_ls
                    )
df_zoo.head()

Unnamed: 0,animal name,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1


## 2.Load sentiment data
This is sentiment data, include 2 columns: 
- **Text**: the sentences written in English
- **Sentiment**: score is either 1 (for positive) or 0 (for negative)

In [None]:

f = "" # string to store the text from file

resp = urlopen("https://archive.ics.uci.edu/ml/machine-learning-databases/00331/sentiment%20labelled%20sentences.zip") # open zip url  
zipfile = ZipFile(BytesIO(resp.read())) # read file 
for line in zipfile.open('sentiment labelled sentences/amazon_cells_labelled.txt'): # open files inside the zip
    f += line.decode('utf-8') # decode text using utf-8

file = f.split('\n') # split data line by line

for i in range(len(file)): 
  file[i] = file[i].split('\t') # split Text and Sentiment

# create data frame 
df_sentiment = pd.DataFrame.from_records(file[:-1],columns=['Text','Sentiment']) 
df_sentiment

Unnamed: 0,Text,Sentiment
0,So there is no way for me to plug it in here i...,0
1,"Good case, Excellent value.",1
2,Great for the jawbone.,1
3,Tied to charger for conversations lasting more...,0
4,The mic is great.,1
...,...,...
995,The screen does get smudged easily because it ...,0
996,What a piece of junk.. I lose more calls on th...,0
997,Item Does Not Match Picture.,0
998,The only thing that disappoint me is the infra...,0


In [None]:
df_sentiment.shape

(1000, 2)

In [None]:
df_sentiment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Text       1000 non-null   object
 1   Sentiment  1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


## 3.Load SMS Spam Collection data

In [None]:

f = "" # string to store the text from file

resp = urlopen("https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip") # open zip url  
zipfile = ZipFile(BytesIO(resp.read())) # read file 
for line in zipfile.open('SMSSpamCollection'): # open files inside the zip
    f += line.decode('utf-8') # decode text using utf-8

file_ = f.split('\n') # split data line by line

for i in range(len(file_)): 
  file_[i] = file_[i].split('\t') # split Text and Sentiment

# create data frame 
df_emails = pd.DataFrame.from_records(file_[:-1],columns=['Sentiment','Text']) 
df_emails

Unnamed: 0,Sentiment,Text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5569,spam,This is the 2nd time we have tried 2 contact u...
5570,ham,Will ü b going to esplanade fr home?
5571,ham,"Pity, * was in mood for that. So...any other s..."
5572,ham,The guy did some bitching but I acted like i'd...


# Part-2 (API)

In [None]:
API = 'https://u50g7n0cbj.execute-api.us-east-1.amazonaws.com/v2/cities?limit=100&page=1&offset=0&sort=asc&country_id=SA&order_by=city'
data_SA = requests.get(API)
data_SA = data_SA.json()
df_SA = pd.DataFrame(data_SA['results'])
df_SA

Unnamed: 0,country,city,count,locations,firstUpdated,lastUpdated,parameters
0,SA,Dharan,24,1,2019-07-15T19:00:00+00:00,2019-07-16T18:00:00+00:00,[pm25]
1,SA,,3822233,2,2019-10-28T18:00:00+00:00,2021-01-20T18:00:00+00:00,[pm25]


In [None]:
# remove rows where city == N/A 
df_SA = df_SA[df_SA.city != 'N/A']

In [None]:
print('What cities are in the data?',df_SA['city'].values)
print('How many rows and columns are in the data?',df_SA.shape)

What cities are in the data? ['Dharan']
How many rows and columns are in the data? (1, 7)
