# Python Pandas

In [None]:
# import libraries
import pandas as pd

# load data
# This code is made to load our data stored on Google Drive
def gd_path(file_id):
    """Generate a shareable link from Google Drive file id."""
    return f"https://drive.google.com/uc?export=download&id={file_id}"

# Google Drive file ids
files_id = {
    "titleauthor": "1F1JOiYXStWacOBca6coNVfyVtoST7ZgD",
    "titles": "1PLdn50N9GRa53ZbuVWo0l47F_IXdvlEm",
    "stores": "1f-GCgip7O93CpbAkYvOsc21eKnSOSHsQ", 
    "sales": "1fzFc9rwYmVIPaGOFmhLVxCi3kg19vNU2", 
    "roysched": "1zPRZPoFPEMKyrNR5VSENeYFHGCBZmxbs", 
    "publishers": "1s9E8_AVOziTrowb3wyh2jg3PV763VOyq",
    "pub_info": "1OEgogcGKy--EpuVj0kqq7lyBZNGW6YSv", 
    "jobs": "1V1Za8hUdXD-vJOyRdX4aQV5wanIff2eM", 
    "employee": "1h9mUjsVqpP74b1w0x7KOw37n_n9Ulkt5", 
    "discounts": "111dvSxMcCsTgOuV1wDSKFJxO1Xcxd9VS", 
    "authors": "1fEF89Nhe61EebAljKlwFwfEuokK0o6aJ"
}

# Read data from Google Drive
sales = pd.read_csv(gd_path(files_id["sales"]), sep=";")
authors = pd.read_csv(gd_path(files_id["authors"]), sep=";")
titleauthor = pd.read_csv(gd_path(files_id["titleauthor"]), sep=";")
publishers = pd.read_csv(gd_path(files_id["publishers"]), sep=";")
jobs = pd.read_csv(gd_path(files_id["jobs"]), sep=";")
stores = pd.read_csv(gd_path(files_id["stores"]), sep=";")
titles =  pd.read_csv(gd_path(files_id["titles"]), sep=";")
employee = pd.read_csv(gd_path(files_id["employee"]), sep=";")

## **select**
##Select the title, price and year-to-date sales of all titles

In [None]:
titles[['title','price','ytd_sales']].head()

Unnamed: 0,title,price,ytd_sales
0,The Busy Executive's Database Guide,19.99,4095
1,Cooking with Computers: Surreptitious Balance ...,11.95,3876
2,You Can Combat Computer Stress!,2.99,18722
3,Straight Talk About Computers,19.99,4095
4,Silicon Valley Gastronomic Treats,19.99,2032


The syntax to select everything from a table in SQL is:

```sql
SELECT 
    title, price, ytd_sales
FROM
    titles;
```

## .rename
## Select the job id and job description of all jobs, but display the columns with the names "id" and "description"

In [None]:
jobs.rename(columns = {'job_id':'id', 'job_desc':'description'}, inplace = True)

jobs[["id","description"]].head()


Unnamed: 0,id,description
0,1,New Hire - Job not specified
1,2,Chief Executive Officer
2,3,Business Operations Manager
3,4,Chief Financial Officier
4,5,Publisher


```sql
SELECT
	job_id AS id,
	job_desc AS description
FROM
	jobs;
```

You can rename columns in pandas with the `rename` method, here is the link to the [documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rename.html).

## .unique
## Select all unique title id's that were involved in a sale

In [None]:
sales['title_id'].unique()

array(['BU1032', 'PS2091', 'PC8888', 'TC3218', 'TC4203', 'TC7777',
       'MC3021', 'PS1372', 'PS2106', 'PS3333', 'PS7777', 'BU7832',
       'MC2222', 'BU2075', 'BU1111', 'PC1035'], dtype=object)

```sql
SELECT
	DISTINCT title_id
FROM
	sales;
```

In [None]:
authors["state"].unique()

array(['CA', 'KS', 'TN', 'OR', 'MI', 'IN', 'MD', 'UT'], dtype=object)

## .nunique

In [None]:
authors["state"].nunique()

8

 Find the total amount of authors for each state

In [None]:
authors.groupby('state').au_id.nunique()

state
CA    15
IN     1
KS     1
MD     1
MI     1
OR     1
TN     1
UT     2
Name: au_id, dtype: int64

## **where**
## Select first and last name from authors who have the last name "Ringer"

In [None]:
authors[["au_fname", "au_lname"]] [authors["au_lname"] == "Ringer"]

Unnamed: 0,au_fname,au_lname
21,Anne,Ringer
22,Albert,Ringer



In SQL the syntax is:

```sql
SELECT au_fname, au_lname
FROM authors
WHERE au_lname="Ringer";
```

5. Select the order number and order date of all sales where *exactly* 20 books where sold.

In [None]:
sales[['ord_num','ord_date','qty']] [sales['qty'] == 20]

Unnamed: 0,ord_num,ord_date,qty
6,P2121,1992-06-15 00:00:00,20
7,P2121,1992-06-15 00:00:00,20
8,N914008,1994-09-14 00:00:00,20
10,P3087a,1993-05-29 00:00:00,20


In SQL the syntax is: 

```sql
SELECT ord_num, ord_date, qty 
FROM sales 
WHER'E qty = 20;
```

## **and** &
## Select first and last name from authors whose last name is "Ringer" and first name is "Anne"

In [None]:
authors[["au_fname", "au_lname"]] [(authors["au_lname"] == "Ringer") & (authors["au_fname"] == "Anne")]


Unnamed: 0,au_fname,au_lname
21,Anne,Ringer


In SQL the syntax is:

```sql
SELECT au_fname, au_lname
FROM authors
WHERE au_fname = "Anne"
AND au_lname = "Ringer";
```

Select all the order numbers with a quantity sold between 25 and 45 from the table sales

In [None]:
sales[['ord_num', 'qty']][ (sales['qty']>25) & (sales['qty']<45) ]

Unnamed: 0,ord_num,qty
5,P2121,40
16,X999,35
20,QA879.1,30


Select all the sales between 1993-03-11 and 1994-09-13

In [None]:
sales[["ord_num", "qty", "ord_date"]] [ (sales["ord_date"] <= '1994-09-13') & (sales["ord_date"] >= '1993-03-11') ].head()

Unnamed: 0,ord_num,qty,ord_date
2,A2976,50,1993-05-24 00:00:00
10,P3087a,20,1993-05-29 00:00:00
11,P3087a,25,1993-05-29 00:00:00
12,P3087a,15,1993-05-29 00:00:00
13,P3087a,25,1993-05-29 00:00:00


In [None]:
e = employee
t= titles

df = t.merge(e, on="pub_id", how="inner")

df[["title", "fname", "lname"]] [ (df["fname"] =="Howard") & (df["lname"] == "Snyder")].head()

Unnamed: 0,title,fname,lname
61,You Can Combat Computer Stress!,Howard,Snyder
71,Is Anger the Enemy?,Howard,Snyder
81,Life Without Fear,Howard,Snyder
91,Prolonged Data Deprivation: Four Case Studies,Howard,Snyder
101,Emotional Security: A New Algorithm,Howard,Snyder


## **or** |
## Select first name, last name and city from authors whose first name is "Dean" and whose city is either "Oakland" or "Berkeley"

In [None]:
authors[["au_fname", "au_lname", "city"]] [ (authors["au_fname"] == "Dean") & (authors["city"] == "Oakland") | (authors["city"] == "Berkeley") ]

Unnamed: 0,au_fname,au_lname,city
2,Cheryl,Carson,Berkeley
4,Dean,Straight,Oakland
6,Abraham,Bennet,Berkeley


In SQL the syntax is:

```sql
SELECT au_fname,au_lname,city
FROM authors
WHERE au_fname ="Dean" 
AND (city ="Oakland" OR city = "Berkeley");
```


## .isin( [ ] )
## Select the name, city and country of publishers, based in the USA

In [None]:
publishers[['pub_name','city','country']][publishers['country'].isin(["USA"])]


Unnamed: 0,pub_name,city,country
0,New Moon Books,Boston,USA
1,Binnet & Hardley,Washington,USA
2,Algodata Infosystems,Berkeley,USA
3,Five Lakes Publishing,Chicago,USA
4,Ramona Publishers,Dallas,USA
6,Scootney Books,New York,USA


The SQL syntax is:
```sql
SELECT pub_name, city,country
FROM publishers
WHERE NOT country="USA";
```

Using .isin(), select all titles of type "psychology", "mod_cook" and "trad_cook"

In [None]:
titles[["title","type"]] [titles["type"].isin(["psychology", "mod_cook", "trad_cook"])]

Unnamed: 0,title,type
4,Silicon Valley Gastronomic Treats,mod_cook
5,The Gourmet Microwave,mod_cook
10,Computer Phobic AND Non-Phobic Individuals: Be...,psychology
11,Is Anger the Enemy?,psychology
12,Life Without Fear,psychology
13,Prolonged Data Deprivation: Four Case Studies,psychology
14,Emotional Security: A New Algorithm,psychology
15,"Onions, Leeks, and Garlic: Cooking Secrets of ...",trad_cook
16,Fifty Years in Buckingham Palace Kitchens,trad_cook
17,"Sushi, Anyone?",trad_cook


## ~ .isin( [ ] )
## Select the name, city and country of publishers, not based in the USA

In [None]:
publishers[['pub_name','city','country']][~publishers['country'].isin(["USA"])]


Unnamed: 0,pub_name,city,country
5,GGG&G,Mnchen,Germany
7,Lucerne Publishing,Paris,France


The SQL syntax is:
```sql
SELECT pub_name, city,country
FROM publishers
WHERE NOT country="USA";
```

Select first name, last name and city from authors whose city is "Oakland" or "Berkeley", and last name is **not** "Straight"

In [None]:
authors[["au_fname", "au_lname","city"]] [ (authors["city"] == "Oakland") | (authors["city"] == "Berkeley") & (~publishers['country'].isin(["USA"])) ]

Unnamed: 0,au_fname,au_lname,city
1,Marjorie,Green,Oakland
4,Dean,Straight,Oakland
15,Dirk,Stringer,Oakland
16,Stearns,MacFeather,Oakland
17,Livia,Karsen,Oakland


In SQL the syntax is:
```sql
SELECT au_fname, au_lname, city 
FROM authors
WHERE (city = "Oakland" OR city = "Berkeley") 
AND NOT au_lname = "Straight";
```

 Select all the authors from the authors table that do not come from the cities Salt Lake City, Ann Arbor, and Oakland.

In [None]:
authors[["au_lname","au_fname"]] [~authors["city"].isin(["Salt Lake City", "Ann Arbor", "Oakland"])]

Unnamed: 0,au_lname,au_fname
0,White,Johnson
2,Carson,Cheryl
3,O'Leary,Michael
5,Smith,Meander
6,Bennet,Abraham
7,Dull,Ann
8,Gringlesby,Burt
9,Locksley,Charlene
10,Greene,Morningstar
11,Blotchet-Halls,Reginald


## sort_values
## Select the name, city and country of publishers, not based in the USA

In [None]:
sales[["ord_num","qty"]].sort_values(by="qty", ascending=False).head(1)

Unnamed: 0,ord_num,qty
3,QA7442.3,75


## max() - min()
## Select the name, city and country of publishers, not based in the USA

In [None]:
sales["qty"].max()

75

In [None]:
titles["price"].min()

0.0

In [None]:
titles["price"][titles['type'] == "psychology"].max()

21.59

 What's the price of the most expensive title from each publisher?

In [None]:
titles.groupby(["pub_id"]).price.max().sort_values(ascending = False)

pub_id
1389    22.95
877     21.59
736     19.99
Name: price, dtype: float64

## len()
## How many rows are there in the table authors?

In [None]:
len(authors)

23

## sum() - count() - mean()
## What's the total amount of year-to-date sales?

In [None]:
titles['ytd_sales'].sum()

97446

In [None]:
titles['title_id'].count()

18

In [None]:
sales['qty'].mean()

23.476190476190474

In [None]:
titles["price"] [titles["type"]=="psychology"].mean()

13.504

In [None]:
authors[["au_id"]][(authors["city"] == "San Jose")| (authors["city"] =="Salt Lake City")].count()

au_id    3
dtype: int64

For each type (business, psychology…), find out how many books each publisher has

In [None]:
titles.groupby(["pub_id", "type"]).title_id.count()

pub_id  type        
736     business        1
        psychology      4
877     UNDECIDED       1
        mod_cook        2
        psychology      1
        trad_cook       3
1389    business        3
        popular_comp    3
Name: title_id, dtype: int64

In [None]:
titles.groupby(["pub_id", "type"]) .agg({"title_id" : "count"})

Unnamed: 0_level_0,Unnamed: 1_level_0,title_id
pub_id,type,Unnamed: 2_level_1
736,business,1
736,psychology,4
877,UNDECIDED,1
877,mod_cook,2
877,psychology,1
877,trad_cook,3
1389,business,3
1389,popular_comp,3


## agg( )
## What's the total amount of year-to-date sales?

In [None]:
sales[['qty']].agg(['count','mean','sum'])

Unnamed: 0,qty
count,21.0
mean,23.47619
sum,493.0


In [None]:
sales['qty'].agg({"qty" : "sum"})

qty    493
Name: qty, dtype: int64

In [None]:
sales["qty"].sum()

493

## contains( )
##  Select all books from the table titles that contain the word "cooking" in its title

In [None]:
titles[titles["title"].str.contains('cooking', case = False)]

Unnamed: 0,title_id,title,type,pub_id,price,advance,royalty,ytd_sales,notes,pubdate
1,BU1111,Cooking with Computers: Surreptitious Balance ...,business,1389,11.95,5000.0,10,3876,Helpful hints on how to use your electronic re...,1991-06-09 00:00:00
6,MC3026,The Psychology of Computer Cooking,UNDECIDED,877,0.0,0.0,0,0,,2014-11-07 10:39:37
15,TC3218,"Onions, Leeks, and Garlic: Cooking Secrets of ...",trad_cook,877,20.95,7000.0,10,375,"Profusely illustrated in color, this makes a w...",1991-10-21 00:00:00


Select all titles that start with the word "The"

In [None]:
titles[titles["title"].str.contains('^The', case = False)]

Unnamed: 0,title_id,title,type,pub_id,price,advance,royalty,ytd_sales,notes,pubdate
0,BU1032,The Busy Executive's Database Guide,business,1389,19.99,5000.0,10,4095,An overview of available database systems with...,1991-06-12 00:00:00
5,MC3021,The Gourmet Microwave,mod_cook,877,2.99,15000.0,24,22246,Traditional French gourmet recipes adapted for...,1991-06-18 00:00:00
6,MC3026,The Psychology of Computer Cooking,UNDECIDED,877,0.0,0.0,0,0,,2014-11-07 10:39:37


Select the full names (first and last name) of authors whose last name starts with "S"

In [None]:
authors[authors[["au_fname", "au_lname"]]["au_lname"].str.contains('^S', regex = True)]

Unnamed: 0,au_id,au_lname,au_fname,phone,address,city,state,zip,contract
4,274-80-9391,Straight,Dean,415 834-2919,5420 College Av.,Oakland,CA,94609,1
5,341-22-1782,Smith,Meander,913 843-0462,10 Mississippi Dr.,Lawrence,KS,66044,0
15,724-08-9931,Stringer,Dirk,415 843-2991,5420 Telegraph Av.,Oakland,CA,94609,0


Find a pattern that reveals whether an employee is Female or Male. Select all female employees.

In [None]:
employee[['emp_id','fname']][employee['emp_id'].str.contains('F$')].head()

Unnamed: 0,emp_id,fname
0,A-C71970F,Aria
1,A-R89858F,Annette
2,AMD15433F,Ann
3,ARD36773F,Anabela
5,CGS88322F,Carine


In [None]:
employee[['emp_id','fname']].loc[employee['emp_id'].str.contains('F$', regex=True)].head()

Unnamed: 0,emp_id,fname
0,A-C71970F,Aria
1,A-R89858F,Annette
2,AMD15433F,Ann
3,ARD36773F,Anabela
5,CGS88322F,Carine


Select the first and last names of all male employees whose name starts with "P".

In [None]:
(
employee
    .loc[employee['emp_id'].str.contains('M$', regex=True)
        &(employee['fname'].str.contains('^P', regex=True)
        |employee['lname'].str.contains('^P', regex=True))]
    [['fname', 'lname']]
)

Unnamed: 0,fname,lname
20,Manuel,Pereira
22,Miguel,Paolino
26,Maria,Pontes
29,Palle,Ibsen
30,Peter,Franken
31,Paolo,Accorti
32,Pirkko,Koskitalo
33,Pedro,Afonso
35,Philip,Cramer
36,Paul,Henriot


Select the name and address of all stores located in an Avenue (its address ends with "Ave.")

In [None]:
stores[["stor_name","stor_address"]][stores["stor_address"].str.contains("Ave.$", regex = True)]

Unnamed: 0,stor_name,stor_address
0,Eric the Read Books,788 Catamaugus Ave.
1,Barnum's,567 Pasadena Ave.


Select all books that have an "ing" in the title, with at least 4 other characters preceding it.
For example, 'cooking' has 4 characters before the 'ing', so this should be included; 'sewing' has only 3 characters before the 'ing', so this shouldn't be included.

In [None]:
titles[titles["title"].str.contains("....ing", case=True, regex = True)]

Unnamed: 0,title_id,title,type,pub_id,price,advance,royalty,ytd_sales,notes,pubdate
1,BU1111,Cooking with Computers: Surreptitious Balance ...,business,1389,11.95,5000.0,10,3876,Helpful hints on how to use your electronic re...,1991-06-09 00:00:00
6,MC3026,The Psychology of Computer Cooking,UNDECIDED,877,0.0,0.0,0,0,,2014-11-07 10:39:37
15,TC3218,"Onions, Leeks, and Garlic: Cooking Secrets of ...",trad_cook,877,20.95,7000.0,10,375,"Profusely illustrated in color, this makes a w...",1991-10-21 00:00:00
16,TC4203,Fifty Years in Buckingham Palace Kitchens,trad_cook,877,11.95,4000.0,14,15096,More anecdotes from the Queen's favorite cook ...,1991-06-12 00:00:00


How many authors have an "i" in their first name, are from Utah, Maryland, or Kansas?

In [None]:
authors[["au_id"]] [ (authors["au_fname"].str.contains("i", case = False)) & (authors["state"].isin(['CA' , 'MD', 'KS'])) ].count()

au_id    6
dtype: int64

## startswith( )
## Select all books from the table titles that contain the word "cooking" in its title

In [None]:
titles[titles["title"].str.startswith('The')]

Unnamed: 0,title_id,title,type,pub_id,price,advance,royalty,ytd_sales,notes,pubdate
0,BU1032,The Busy Executive's Database Guide,business,1389,19.99,5000.0,10,4095,An overview of available database systems with...,1991-06-12 00:00:00
5,MC3021,The Gourmet Microwave,mod_cook,877,2.99,15000.0,24,22246,Traditional French gourmet recipes adapted for...,1991-06-18 00:00:00
6,MC3026,The Psychology of Computer Cooking,UNDECIDED,877,0.0,0.0,0,0,,2014-11-07 10:39:37


Select the full names (first and last name) of authors whose last name starts with "S"

In [None]:
authors[authors[["au_fname", "au_lname"]]["au_lname"].str.startswith('S')]

Unnamed: 0,au_id,au_lname,au_fname,phone,address,city,state,zip,contract
4,274-80-9391,Straight,Dean,415 834-2919,5420 College Av.,Oakland,CA,94609,1
5,341-22-1782,Smith,Meander,913 843-0462,10 Mississippi Dr.,Lawrence,KS,66044,0
15,724-08-9931,Stringer,Dirk,415 843-2991,5420 Telegraph Av.,Oakland,CA,94609,0


## endswith( )

In [None]:
stores[["stor_name","stor_address"]][stores["stor_address"].str.endswith(("Ave.","St."))]

Unnamed: 0,stor_name,stor_address
0,Eric the Read Books,788 Catamaugus Ave.
1,Barnum's,567 Pasadena Ave.
2,News & Brews,577 First St.
4,Fricative Bookshop,89 Madison St.
5,Bookbeat,679 Carson St.


## between( )

In [None]:
sales.loc[sales['ord_date'] .between(left='1993-03-11', right ='1994-09-13', inclusive = 'both') ] .sort_values(by="qty").head(5)


Unnamed: 0,stor_id,ord_num,ord_date,qty,payterms,title_id
15,7896,TQ456,1993-12-12 00:00:00,10,Net 60,MC2222
12,7131,P3087a,1993-05-29 00:00:00,15,Net 60,PS3333
14,7896,QQ2299,1993-10-28 00:00:00,15,Net 60,BU7832
10,7131,P3087a,1993-05-29 00:00:00,20,Net 60,PS1372
11,7131,P3087a,1993-05-29 00:00:00,25,Net 60,PS2106


## joins( )

Select the order number, quantity and book title for all sales.

In [None]:
s= sales
t= titles
df = pd.merge(s, t, on="title_id", how="left")

df[["ord_num", "qty", "title"]].head()

Unnamed: 0,ord_num,qty,title
0,6871,5,The Busy Executive's Database Guide
1,722a,3,Is Anger the Enemy?
2,A2976,50,Secrets of Silicon Valley
3,QA7442.3,75,Is Anger the Enemy?
4,D4482,10,Is Anger the Enemy?


Select the full name, job description and publisher name of all employees
Hint: you will have to perform 2 joins in a single query to merge 3 tables together.

In [None]:
jobs.head()

Unnamed: 0,id,description,min_lvl,max_lvl
0,1,New Hire - Job not specified,10,10
1,2,Chief Executive Officer,200,250
2,3,Business Operations Manager,175,225
3,4,Chief Financial Officier,175,250
4,5,Publisher,150,250


**3 table's joins**

In [None]:
p= publishers
e= employee
j = jobs
df = pd.merge(e, p, on="pub_id", how="left").merge(j, left_on="job_id", right_on="id",how="left")

df[["fname", "lname","pub_name"]].head()

Unnamed: 0,fname,lname,pub_name
0,Aria,Cruz,Algodata Infosystems
1,Annette,Roulet,Lucerne Publishing
2,Ann,Devon,Scootney Books
3,Anabela,Domingues,Binnet & Hardley
4,Carlos,Hernadez,Lucerne Publishing


## case

In [None]:
import numpy as np

Select everything from the sales table and create a new column called "sales_category" to categorise qty:

In [None]:
s = sales


s["sales_category"] = np.where(s['qty'] >= 50, "high sales",
                      np.where(s['qty'] >= 20, "medium sales","low sales"))

s.head()

Unnamed: 0,stor_id,ord_num,ord_date,qty,payterms,title_id,sales_category
0,6380,6871,1994-09-14 00:00:00,5,Net 60,BU1032,low sales
1,6380,722a,1994-09-13 00:00:00,3,Net 60,PS2091,low sales
2,7066,A2976,1993-05-24 00:00:00,50,Net 30,PC8888,high sales
3,7066,QA7442.3,1994-09-13 00:00:00,75,ON invoice,PS2091,high sales
4,7067,D4482,1994-09-14 00:00:00,10,Net 60,PS2091,low sales


In [None]:
t = titles
p = publishers

t["price_category"] = np.where(t["price"] <= 5 , "super low",
                      np.where(t["price"] <= 10 , "low",
                      np.where(t["price"] <= 15 , "medium", "high")))

df = pd.merge(t, p, on="pub_id", how="left")

df.groupby(by = [ 'pub_name', 'type', 'price_category']).agg({"price" : "mean"}).round()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,price
pub_name,type,price_category,Unnamed: 3_level_1
Algodata Infosystems,business,high,20.0
Algodata Infosystems,business,medium,12.0
Algodata Infosystems,popular_comp,high,21.0
Algodata Infosystems,popular_comp,super low,0.0
Binnet & Hardley,UNDECIDED,super low,0.0
Binnet & Hardley,mod_cook,high,20.0
Binnet & Hardley,mod_cook,super low,3.0
Binnet & Hardley,psychology,high,22.0
Binnet & Hardley,trad_cook,high,21.0
Binnet & Hardley,trad_cook,medium,13.0


## group by

In [None]:
s.groupby(["sales_category"]).agg({"qty" : "sum"})

Unnamed: 0_level_0,qty
sales_category,Unnamed: 1_level_1
high sales,125
low sales,83
medium sales,285


In [None]:
s.groupby(["sales_category"]).agg({"qty" : "sum"}).sort_values(by="qty", ascending=False)

Unnamed: 0_level_0,qty
sales_category,Unnamed: 1_level_1
medium sales,285
high sales,125
low sales,83


In [None]:
s.head()

Unnamed: 0,stor_id,ord_num,ord_date,qty,payterms,title_id,sales_category
0,6380,6871,1994-09-14 00:00:00,5,Net 60,BU1032,low sales
1,6380,722a,1994-09-13 00:00:00,3,Net 60,PS2091,low sales
2,7066,A2976,1993-05-24 00:00:00,50,Net 30,PC8888,high sales
3,7066,QA7442.3,1994-09-13 00:00:00,75,ON invoice,PS2091,high sales
4,7067,D4482,1994-09-14 00:00:00,10,Net 60,PS2091,low sales


## str[ :2 ]

In [None]:
s["short_title_id"] = s["title_id"].str[:2]
s.head(5)

Unnamed: 0,stor_id,ord_num,ord_date,qty,payterms,title_id,sales_category,short_title_id
0,6380,6871,1994-09-14 00:00:00,5,Net 60,BU1032,low sales,BU
1,6380,722a,1994-09-13 00:00:00,3,Net 60,PS2091,low sales,PS
2,7066,A2976,1993-05-24 00:00:00,50,Net 30,PC8888,high sales,PC
3,7066,QA7442.3,1994-09-13 00:00:00,75,ON invoice,PS2091,high sales,PS
4,7067,D4482,1994-09-14 00:00:00,10,Net 60,PS2091,low sales,PS


In [None]:
s["short_title_id2"] = s["title_id"].str[2:]
s.head(5)

Unnamed: 0,stor_id,ord_num,ord_date,qty,payterms,title_id,sales_category,short_title_id,short_title_id2
0,6380,6871,1994-09-14 00:00:00,5,Net 60,BU1032,low sales,BU,1032
1,6380,722a,1994-09-13 00:00:00,3,Net 60,PS2091,low sales,PS,2091
2,7066,A2976,1993-05-24 00:00:00,50,Net 30,PC8888,high sales,PC,8888
3,7066,QA7442.3,1994-09-13 00:00:00,75,ON invoice,PS2091,high sales,PS,2091
4,7067,D4482,1994-09-14 00:00:00,10,Net 60,PS2091,low sales,PS,2091


## sample()

In [None]:
category_type_df.loc[category_type_df["type"] == "12175397", :].sample(10)

##value_counts()

In [None]:
product_category_df["category"].value_counts()

## str.lower() / case=False

In [None]:
dataframe["name"].str.contains("charger", case=False) , "charger",

In [None]:
product_category_df.loc[product_category_df["name"].str.lower().str.contains("cable"), "category"] += ", cable"

In [None]:
# how to see missing valuees rows

In [None]:
products_cl[products_cl['type'].isna()]