In [None]:
import pandas as pd

In [None]:
df2 = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original',
                  sep='\s+',
                  header=None)

In [None]:
df2.head(10)

In [None]:
df2.shape

### Selecting a range of rows and columns of the original dataset

In [None]:
df3 = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original',
                  sep='\s+',
                  header=None,usecols=[4, 5], 
                     skiprows=range(1,107), 
                     nrows=100)

In [None]:
df3.head(10)

In [None]:
df3.shape

In [None]:
df2.head()

In [None]:
df2.columns = ['mpg','cyl','disp','hp','wt','acc','model','origin','cars']

In [None]:
df2.head()

In [None]:
df2.info()

## Aggregate

In [None]:
df2.describe()

In [None]:
df2.mpg.sum()

In [None]:
df2.mpg.count()

In [None]:
df2.groupby('cyl').sum()

In [None]:
df2.groupby('cyl').mpg.mean()

In [None]:
df2.groupby(['cyl'])[['mpg', 'disp']].mean()

In [None]:
df2.mean()

In [None]:
df2.to_excel(r"C:\Users\v-ashnay\Documents\Ashish\Machine-learning-in-python\Intro to Data Science and Python\mpg-cars.xlsx",index=False)

In [None]:
?df2.to_excel

## Frequency Analysis

In [None]:
pd.crosstab(df2.origin, df2.cyl)

## Pivot

In [None]:
df2.pivot_table(index='cyl', aggfunc='mean')

In [None]:
df2.pivot_table(index='cyl', aggfunc='sum')

In [None]:
df2.pivot_table(index='cyl', columns='origin', aggfunc='sum')

In [None]:
df2.pivot_table(index='cyl', columns='origin', values='mpg', aggfunc='sum')

In [None]:
df2.pivot_table(index='cyl', columns='origin', values='mpg', aggfunc='count')

## Slice, Dice and Splice

In [None]:
df2[:3] # Slice from beggining

In [None]:
df2[400:] # Slice till end

In [None]:
df2[5:10] # Dice from 5 to 10

In [None]:
df2[::100] # Splicing every 100

In [None]:
df2[3:330:100] # Splicing and Diceing together

### Sorting in Pandas

In [None]:
df2.sort_values('wt', ascending=False).sort_values('mpg').head()

In [None]:
df2.sort_values(['mpg', 'wt'])

In [None]:
df2.sort_values(['mpg', 'wt'], ascending=[True, False]).head()

## Checking Pandas Version

In [None]:
pd.show_versions()

In [None]:
pd.__version__

In [None]:
sdf1 = df2[:3]
sdf2 = df2[6:10]
sdf3 = df2[400:]
sdf4 = df2[200:300:20]

In [None]:
sdf1

In [None]:
sdf2

In [None]:
sdf3

In [None]:
sdf4

## Combining DataFrames using Pandas

In [None]:
# sdf4.columns = range(0, 2*sdf4.shape[1], 2)
sdf4.columns = sdf3.columns
pd.concat([sdf1, sdf2, sdf3, sdf4])

In [None]:
raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
df_a = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])
df_a

In [None]:
raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
df_b = pd.DataFrame(raw_data, columns = ['subject_id', 'first_name', 'last_name'])
df_b

In [None]:
raw_data = {
        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
        'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
df_n = pd.DataFrame(raw_data, columns = ['subject_id','test_id'])
df_n

In [None]:
df_n.info()

## Join the two dataframes along rows

In [None]:
df_new = pd.concat([df_a, df_b])
df_new

## Join the two dataframes along columns

In [None]:
pd.concat([df_a, df_b], axis=1)

### Merge two dataframes along the subject_id value

In [None]:
pd.merge(df_new, df_n, on='subject_id')

### Merge with outer join

In [None]:
df_a

In [None]:
df_b

In [None]:
pd.merge(df_a, df_b, on='subject_id', how='outer')

### Merge with inner join

In [None]:
pd.merge(df_a, df_b, on='subject_id', how='inner')

### Merge with right join

In [None]:
pd.merge(df_a, df_b, on='subject_id', how='right')

### Merge with left join

In [None]:
pd.merge(df_a, df_b, on='subject_id', how='left')

### Merge while adding a suffix to duplicate column names

In [None]:
pd.merge(df_a, df_b, on='subject_id', how='left', suffixes=('_left', '_right'))

## Quiz

In [None]:
registry = pd.DataFrame({
    's.no.': [1, 2, 3, 4, 6],
    'name': ['Student1', 'Student2', 'Student3', 'Student4', 'VeryNewStudent']
})
registry

In [None]:
blood_bank = pd.DataFrame({
    'student_name': ['Student3', 'Student4', 'Student1', 'Student2', 'NewStudent'],
    'blood_type': ['A', 'B', 'AB', 'O', 'P']
})
blood_bank

In [None]:
registry.merge(blood_bank, left_on=['name'], right_on=['student_name'], how='inner')

In [None]:
registry.merge(blood_bank, left_on=['name'], right_on=['student_name'], how='left')

In [None]:
registry.merge(blood_bank, left_on=['name'], right_on=['student_name'], how='right')

In [None]:
registry.merge(blood_bank, left_on=['name'], right_on=['student_name'], how='outer')

### User Defined function 

In [None]:
def f(x):
   return x * x

In [None]:
type(f)

In [None]:
f(9)

### Another method to define a function - Python lambda

In [None]:
# lambda arguments: expression

# Lambda functions can accept zero or more arguments but only one expression/statement.
# The return value of the lambda function is the value that this expression is evaluated to.
# lambdas are only useful when you want to define a one-off function (anonymous functions)

In [None]:
f = lambda x: x * x
print(type(f))

In [None]:
f(9)

In [None]:
mul = lambda x, y: x * y
mul(5, 2)

In [None]:
f1 = lambda a,b: a if (a > b) else b

In [None]:
f1(2,3)

### Using lambdas with map

In [None]:
# Map is a Python built-in function that takes in a function and a sequence as arguments
# and then applies the function func to all the elements of the sequence seq
# Syntax - map(func, seq)

L = [1, 2, 3, 4]

sqr=list(map(lambda x: x**2, L))
sqr

In [None]:
def multiply2(x):
  return x * 2
    
abc=list(map(multiply2, (1, 2, 3, 4)))
abc

In [None]:
list(map(max, [1,2,3,4], [0,10,0,10]))

In [None]:
# Map can be applied to more than one list

a = [1, 2, 3, 4]
b = [17, 12, 11, 10]
c = [-1, -4, 5, 9] 
opt= list(map(lambda x, y, z : x+y+z, a, b, c))
opt

In [None]:
opt2= list(map(lambda x, y : x+y, a, b))
opt2

In [None]:
dict_a = [{'name': 'python', 'points': 10}, {'name': 'java', 'points': 8}]

In [None]:
# map() returns an iterator, we have used the list() function to produce the result at once

val = list(map(lambda x : x['name'], dict_a)) # Output
val

In [None]:
val1 = list(map(lambda x : x['points']*10,  dict_a)) # Output:
val1

In [None]:
val2 = list(map(lambda x : x['name'] == "python", dict_a)) # Output:
val2

In [None]:
# Filter function filters out all the elements of a sequence seq, for which the function func returns True.
# Syntax filter(funct, seq)

fibonacci = [0,1,1,2,3,5,8,13,21,34,55]
odd_numbers = list(filter(lambda x: x % 2, fibonacci))
print(odd_numbers)

In [None]:
even_numbers = list(filter(lambda x: x % 2 == 0, fibonacci))
print(even_numbers)

## For Loop

In [None]:
computer_brands = ["Apple", "Asus", "Dell", "Samsung"]
for brands in computer_brands:
    print brands

### Reassign 'hello' in this nested list to say 'goodbye' instead:
    
list3 = [1,2,[3,4,'hello']]

In [None]:
list3 = [1,2,[3,4,'hello']]

for i in range(len(list3)):
    if list3[i]=='hello':
        list3[i]='goodbye'
    elif type(list3[i])==list:
        for j in range(len(list3[i])):
            if list3[i][j]=='hello':
                list3[i][j]='goodbye'
print(list3)

In [None]:
list3 = [1,2,[3,4,'hello']]
list3[2][2]='goodbye'
list3

In [None]:
# While loop - while this is true, do this 

computer_brands = ["Apple", "Asus", "Dell", "Samsung"]
i = 0
while i < len(computer_brands):
    print(computer_brands[i])
    i = i + 1

In [None]:
def square(x): return lambda : x*x
listOfLambdas = [square(i) for i in [1,2,3,4,5]]
for f in listOfLambdas: print(f())

In [None]:
abc=list(map(lambda x: x*x, [1, 2, 3, 4, 5]))
abc

In [None]:
a_dict = {'one': 1, 'two': 2, 'thee': 3, 'four': 4}

In [None]:
a_dict.keys()

In [None]:
a_dict.items()

In [None]:
a_dict.values()

In [None]:
new_dict = {value: key for key, value in a_dict.items()}

In [None]:
new_dict