## Python IRL

### Intro and environment setup

In [2]:
from IPython.display import HTML

HTML("""<div style="position: relative; padding-bottom: 42.1792618629174%; height: 0;">
<iframe src="https://www.loom.com/embed/b24871c305494b97946a2aa234ccdec4" frameborder="0" 
webkitallowfullscreen mozallowfullscreen allowfullscreen style="position: absolute; top: 0; 
left: 0; width: 100%; height: 100%;"></iframe></div>""")

### Let's fake some data 

In [10]:
HTML('''<div style="position: relative; padding-bottom: 42.1875%; 
height: 0;"><iframe src="https://www.loom.com/embed/4ed6d1c48bc04143a441ae93087eea8d" 
frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen style="position: absolute; 
top: 0; left: 0; width: 100%; height: 100%;"></iframe></div>''')

In [11]:
from faker import Faker
import pandas as pd 

fake = Faker()

names = []
phones = []
emails = []
companies = []

for _ in range(500):
    names.append(fake.name())
    phones.append(fake.phone_number())
    emails.append(fake.email())
    companies.append(fake.company())

### pandas is useful

In [70]:
HTML('''<div style="position: relative; padding-bottom: 42.1792618629174%; height: 0;">
<iframe src="https://www.loom.com/embed/c0ea0f2f6c9e4269bf87d6d6ebc34784" 
frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen style="position: absolute; 
top: 0; left: 0; width: 100%; height: 100%;"></iframe></div>''')

In [84]:
import numpy as np

contacts_df = pd.DataFrame(zip(names, phones, emails, companies), columns=['Name', 'Phone', 'Email', 'Company'])
names_df = pd.DataFrame(names[:50], columns=['Name'])

# Boolean masking 

contacts_df['is_yahoo'] = np.where(contacts_df.Email.str.contains('yahoo.com'), 'yahoo', 'not yahoo')

# Merging datasets 

merged_df = pd.merge(contacts_df, names_df, on=['Name'])

# String operations on dataframe

contacts_df['domain'] = contacts_df.Email.str.split('@').str.get(1)

# Apply a lambda 

contacts_df['has .com'] = contacts_df.domain.apply(lambda x: '.com' in x)

# Group by 

contacts_df.groupby(contacts_df.Name.str.split().str.get(0)).head(5)

# Add columns 

names_df['location'] = 'Seattle'

# Concat dataframes 

all_existing_contacts_df = pd.concat([contacts_df[['Name']], names_df], ignore_index=True)

# Remove nulls 

all_existing_contacts_df[all_existing_contacts_df.location.notnull()]

# Push into dictionary 

contacts_df.to_dict('records')

# Push series to list

contacts_df.Email.tolist()

### Solving real problems

In [81]:
HTML('''<div style="position: relative; padding-bottom: 42.1875%; 
height: 0;"><iframe src="https://www.loom.com/embed/b8b0bbb15b3b4122bc18da98d23f97de" 
frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen 
style="position: absolute; top: 0; left: 0; 
width: 100%; height: 100%;"></iframe></div>''')

In [82]:
# "Can we exclude free domains?"

free_domains = ['hotmail.com', 'gmail.com', 'yahoo.com']

contacts_df = contacts_df[~contacts_df['domain'].isin(free_domains)]

# "Can we only keep one person per company and add them to our CRM?"

contacts_df.groupby(contacts_df.Company).head(1).to_dict('records')

# "Can we remove people from a list that don't have a phone number or have this domain?" 
contacts_df[(contacts_df.Phone.notnull()) & (contacts_df.domain != 'wright.com')]

Unnamed: 0,Name,Phone,Email,Company,is_yahoo,domain,has .com
3,Gabriel Thomas,+1-391-828-1068x69900,gordonangela@vasquez.org,Calderon PLC,not yahoo,vasquez.org,False
6,Robert Ball,133-768-4628x89391,angelamarquez@herrera-chen.com,Robbins-Schneider,not yahoo,herrera-chen.com,True
7,Jessica Morris,+1-004-728-6867x11046,philipwhite@boyer.com,Osborne Inc,not yahoo,boyer.com,True
8,Jacqueline Gomez,001-079-697-5686x7415,katherineking@curry-jackson.biz,"Brewer, Reyes and Scott",not yahoo,curry-jackson.biz,False
10,Jason Scott,001-595-873-0924x775,carolphillips@jimenez-casey.org,"Dillon, Black and Kim",not yahoo,jimenez-casey.org,False
...,...,...,...,...,...,...,...
493,Carl Perkins,001-648-408-7575x691,psmith@taylor.com,Johnson PLC,not yahoo,taylor.com,True
494,Mark Davis,+1-885-387-2753x612,christypierce@jefferson.biz,Fletcher Group,not yahoo,jefferson.biz,False
496,Lauren Fuentes,791.795.5069,sjames@mcdowell-boone.org,Smith Inc,not yahoo,mcdowell-boone.org,False
497,Henry Daugherty,001-528-633-1553x85759,sylviafox@kim.com,"Leon, Clark and Glenn",not yahoo,kim.com,True
