# Code a day keeps the confusion at bay

In [1]:
#for all required libraries
import pandas as pd
import numpy as np
import random

### Day 1: iterating through a pandas datatframe 23/09/18
For every row I want to be able to access its elements (values in cells) by the name of the columns. 

In [2]:
df = [{'c1': 1, 'c2': 2, 'c3': 'a'}, {'c1': 3, 'c2': 4, 'c3':'b'}, 
      {'c1': 5, 'c2':6, 'c3':'c'}]
pdf = pd.DataFrame(df)
pdf

Unnamed: 0,c1,c2,c3
0,1,2,a
1,3,4,b
2,5,6,c


In [3]:
for i in range(0, len(pdf)):
    print(pdf.iloc[i]['c1'], pdf.iloc[i]['c2'])

1 2
3 4
5 6


In [4]:
for index, row in pdf.iterrows():
    print(row['c1'], row['c2'])

1 2
3 4
5 6


### Day 2: Slicing data with loc in a pandas dataframe 24/09/18
loc will access a group of rows and columns by label(s), or a boolean array

In [4]:
pdf.index = ['r1', 'r2', 'r3']

In [5]:
pdf.loc[['r1']] #[[]] return a dataframe

Unnamed: 0,c1,c2,c3
r1,1,2,a


In [6]:
pdf.loc['r1'] #[] or [()] will return a series

c1    1
c2    2
c3    a
Name: r1, dtype: object

In [7]:
pdf.loc['r1':'r2', 'c3']

r1    a
r2    b
Name: c3, dtype: object

In [8]:
pdf.loc[[False, True]]

Unnamed: 0,c1,c2,c3
r2,3,4,b


In [9]:
#Conditional that returns a boolean Series with column labels specified
pdf.loc[pdf['c1'] > 4, ['c1']] 

Unnamed: 0,c1
r3,5


In [10]:
#Returns a boolean series
pdf.loc[lambda pdf: pdf['c2'] == 6]

Unnamed: 0,c1,c2,c3
r3,5,6,c


In [11]:
#Set a new value in rows, column
pdf.loc[['r1', 'r2'], ['c1']] = 10
pdf

Unnamed: 0,c1,c2,c3
r1,10,2,a
r2,10,4,b
r3,5,6,c


In [12]:
#Change values in an entire row where matching the called condition
pdf.loc[pdf['c1'] > 9] = 1
#Change values in an entire column
pdf.loc[:2, 'c2'] = 5
pdf

Unnamed: 0,c1,c2,c3
r1,1,5,1
r2,1,5,1
r3,5,6,c


### Day 3: creating a multi-index random series/dataframe with pandas and numpy

In [7]:
s = pd.Series(np.random.randn(9),
              index = [['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],
                      [1, 1, 2, 1, 2, 3, 1, 2, 3]])

In [8]:
s

a  1   -1.195735
b  1    0.232250
   2    1.235262
c  1    1.117630
   2   -0.324921
   3    1.758867
d  1    0.763433
   2    0.591179
   3   -1.557379
dtype: float64

### Day 4: List Comprehensions

In [3]:
listy = [1, 2, 3, 4, 5, 6, 7, 8, 9]
evens = [x for x in listy if x % 2 is 0]

[2, 4, 6, 8]

The list comprehensions basic approach is:
#### expression for item in list if conditional 

In [83]:
#square every number for number in a list 0-9
squares = [n*n for n in range(10)]
squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [94]:
quote = 'Nothing in life is to be feared, it is only to be understood. Now is the time to understand more, so that we may fear less. ― Marie Curie, radar elle racecar'

In [95]:
#str to list of individual words
word_list = quote.split()

In [96]:
#find words containing a letter
[w for w in word_list if 'm' in w]

['time', 'more,', 'may']

In [97]:
#palendrome, for every word in the word list if word is equal to word backwards
[w for w in word_list if w == w[::-1]]

['―', 'radar', 'elle', 'racecar']

### Day 5: Scrap text from the web

In [38]:
import requests
from bs4 import BeautifulSoup
from urllib.robotparser import RobotFileParser

In [39]:
r = requests.get('http://www.shakespeares-sonnets.com/all.php')
print(r.status_code) #response 200 mean the data can be scraped
r.headers

200


{'Date': 'Sun, 07 Oct 2018 15:17:21 GMT', 'Server': 'Apache', 'Vary': 'Accept-Encoding', 'Content-Encoding': 'gzip', 'Keep-Alive': 'timeout=2, max=100', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked', 'Content-Type': 'text/html; charset=UTF-8'}

In [40]:
#use robots parser to check it is okay to scrap for the we page
rp = RobotFileParser()
rp.set_url('http://www.shakespeares-sonnets.com/all.php')
rp.read()
rp.can_fetch("*", 'http://www.shakespeares-sonnets.com/all.php')

True

In [41]:
#create a beautiful soup object
text = r.content
soup = BeautifulSoup(text, 'lxml').text

In [42]:
with open('sonnets.txt', 'w', encoding='utf-8') as f_out:
    f_out.write(soup)

### Day 6: text analysing 

In [15]:
with open('sonnets.txt') as f:
    text = f.read()

In [21]:
def letter_count(text, letter):
    '''
    (variable, str) -> number
    Return the number of occurances of a letter in the given text
    '''
    
    count = 0
    
    for i in text:
        if i == letter:
            count += 1
    
    return count

In [23]:
letter_count(text, 'v')

944

In [24]:
len(text)

95077

What is the percentage of occurances of this letter?

In [33]:
for char in 'abcdefghijklmnopqrstuvwxyz':
    percent = 100 * letter_count(text, char) / len(text)
    
    print('{} - {}%'.format(char, round(percent, 2)))

a - 4.87%
b - 1.15%
c - 1.41%
d - 2.9%
e - 9.88%
f - 1.65%
g - 1.44%
h - 5.3%
i - 4.52%
j - 0.07%
k - 0.58%
l - 3.23%
m - 2.15%
n - 4.77%
o - 5.98%
p - 1.06%
q - 0.05%
r - 4.45%
s - 5.23%
t - 7.2%
u - 2.45%
v - 0.99%
w - 1.73%
x - 0.07%
y - 2.07%
z - 0.02%
