In [1]:
import pandas as pd
import numpy as np

### Bonus Information: .copy()
#### if .copy() method is not used, when copied verison is changed, the original is also changed. Below is example.

In [2]:
a1 = pd.DataFrame({'a':[1,2],'b':[3,4]})
a2 =a1.copy()
a3 = a1

In [3]:
a2.rename(columns = {'b':'bb'},inplace=True)
a1

Unnamed: 0,a,b
0,1,3
1,2,4


In [4]:
a3.rename(columns = {'b':'bb'},inplace=True)
a1

Unnamed: 0,a,bb
0,1,3
1,2,4


### Create a dataframe

In [67]:
cars = {'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Price': [22000,25000,27000,35000]
        }

df = pd.DataFrame(cars, columns = ['Brand', 'Price'])
#df = pd.DataFrame(cars)  works the same!
print (df)

            Brand  Price
0     Honda Civic  22000
1  Toyota Corolla  25000
2      Ford Focus  27000
3         Audi A4  35000


In [68]:
df.columns

Index(['Brand', 'Price'], dtype='object')

### Add columns

In [69]:
dddd = [30000,23000,146000,82340]
random = np.arange(1,5,1)
#d2 =pd.DataFrame(dddd)
#df.append(d2,ignore_index = True)  #Instead of doing like that, a simpler way is below:

df['Milage'] = dddd
df['random'] = random
df

Unnamed: 0,Brand,Price,Milage,random
0,Honda Civic,22000,30000,1
1,Toyota Corolla,25000,23000,2
2,Ford Focus,27000,146000,3
3,Audi A4,35000,82340,4


### Drop unnecessary columns

In [71]:
df.drop('random',axis=1,inplace=True)
df

Unnamed: 0,Brand,Price,Milage
0,Honda Civic,22000,30000
1,Toyota Corolla,25000,23000
2,Ford Focus,27000,146000
3,Audi A4,35000,82340


In [72]:
df.columns

Index(['Brand', 'Price', 'Milage'], dtype='object')

### Create another data frame (with a small difference in coding)

In [86]:
df2 = pd.DataFrame({'Brand': ['Honda Civic','Toyota Corolla','Ford Focus','Audi A4'],
        'Year': [2014,2015,2016,2020],
        'Last Owner': ['Man','Man','Man','Woman'],
                    
        })
df2

Unnamed: 0,Brand,Year,Last Owner
0,Honda Civic,2014,Man
1,Toyota Corolla,2015,Man
2,Ford Focus,2016,Man
3,Audi A4,2020,Woman


### Merge two data frames

In [87]:
df3 = df.merge(df2)
df3

Unnamed: 0,Brand,Price,Milage,Year,Last Owner
0,Honda Civic,22000,30000,2014,Man
1,Toyota Corolla,25000,23000,2015,Man
2,Ford Focus,27000,146000,2016,Man
3,Audi A4,35000,82340,2020,Woman


### Now, split car brand and model

In [88]:
v_split = df3['Brand'].str.split(' ')
v_split.head()

0       [Honda, Civic]
1    [Toyota, Corolla]
2        [Ford, Focus]
3           [Audi, A4]
Name: Brand, dtype: object

In [89]:
df3['Brand2']=v_split.str[0]
df3['Model']=v_split.str[1]
df3

Unnamed: 0,Brand,Price,Milage,Year,Last Owner,Brand2,Model
0,Honda Civic,22000,30000,2014,Man,Honda,Civic
1,Toyota Corolla,25000,23000,2015,Man,Toyota,Corolla
2,Ford Focus,27000,146000,2016,Man,Ford,Focus
3,Audi A4,35000,82340,2020,Woman,Audi,A4


##### Drop the redundant first column

In [90]:
df3.drop('Brand',axis=1, inplace=True)
df3

Unnamed: 0,Price,Milage,Year,Last Owner,Brand2,Model
0,22000,30000,2014,Man,Honda,Civic
1,25000,23000,2015,Man,Toyota,Corolla
2,27000,146000,2016,Man,Ford,Focus
3,35000,82340,2020,Woman,Audi,A4


### There is a quicker way to do that too.

In [91]:
df3 = df.merge(df2)
df3[['Brand-2','Model-2']]=df3['Brand'].str.split(' ', expand=True)
df3.drop('Brand',axis=1, inplace=True)
df3

Unnamed: 0,Price,Milage,Year,Last Owner,Brand-2,Model-2
0,22000,30000,2014,Man,Honda,Civic
1,25000,23000,2015,Man,Toyota,Corolla
2,27000,146000,2016,Man,Ford,Focus
3,35000,82340,2020,Woman,Audi,A4


### Let's filter cars that are last owned by a man

In [100]:
df3.loc[df3['Last Owner']=='Man',['Price','Milage','Year','Brand-2','Model-2','Last Owner']]

Unnamed: 0,Price,Milage,Year,Brand-2,Model-2,Last Owner
0,22000,30000,2014,Honda,Civic,Man
1,25000,23000,2015,Toyota,Corolla,Man
2,27000,146000,2016,Ford,Focus,Man


### Let's filter for 2015 and later model cars, last owned by a man

In [99]:
df3.loc[(df3['Last Owner']=='Man')&(df3['Year']>=2015),['Price','Milage','Year','Brand-2','Model-2','Last Owner']]

Unnamed: 0,Price,Milage,Year,Brand-2,Model-2,Last Owner
1,25000,23000,2015,Toyota,Corolla,Man
2,27000,146000,2016,Ford,Focus,Man


### Another quick merge example

In [107]:
dframe = pd.DataFrame({"a":[1, 2, 3], 
                    "b":[5, 6, 7],  
                    "c":[1, 5, 4]}) 
dframe

Unnamed: 0,a,b,c
0,1,5,1
1,2,6,5
2,3,7,4


In [117]:
dframe2 = pd.DataFrame({"c":[4, 5, 1], 
                    "d":[1, 2, 3],  
                    "e":[1, 2, 3],
                    "f":[3, 3, 4]}) 
df4 = dframe.merge(dframe2)
df4

Unnamed: 0,a,b,c,d,e,f
0,1,5,1,3,3,4
1,2,6,5,2,2,3
2,3,7,4,1,1,3


In [124]:
df4.rename(columns={"a": "A", "b": "B"})

Unnamed: 0,A,B,c,d,e,f
0,1,5,1,3,3,4
1,2,6,5,2,2,3
2,3,7,4,1,1,3
