# Data Science with Python - More Topics #2
---

# Working with data with Pandas - Introduction

https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html

In [1]:
import pandas as pd
# pd.set_option('display.max_rows', 10)
# pd.set_option('display.max_columns', 1000)

In [2]:
dataset = pd.read_csv('db.csv', sep = ';')

In [3]:
dataset

Unnamed: 0,Name,Engine,Year,Mileage,Zero_km,Accessories,Value
0,Jetta Variant,4.0 Turbo Engine,2003,44410.0,False,"['Alloy wheels', 'Power locks', 'Autopilot', '...",88078.64
1,Passat,Diesel Engine,1991,5712.0,False,"['Multimedia Center', 'Panoramic Roof', 'ABS B...",106161.94
2,Crossfox,V8 diesel engine,1990,37123.0,False,"['Autopilot', 'Stability control', 'Twilight s...",72832.16
3,DS5,Motor 2.4 Turbo,2019,,True,"['Power locks', '4 X 4', 'Power windows', 'Twi...",124549.07
4,Aston Martin DB4,2.4 Turbo Engine,2006,25757.0,False,"['Alloy wheels', '4 X 4', 'Multimedia center',...",92612.10
...,...,...,...,...,...,...,...
253,Phantom 2013,V8 engine,2014,27505.0,False,"['Stability control', 'Autopilot', 'Automatic ...",51759.58
254,Cadillac Ciel concept,Motor V8,1991,29981.0,False,"['Leather seats', 'Digital panel', 'Rain senso...",51667.06
255,GLK class,5.0 V8 Bi-Turbo engine,2002,52637.0,False,"['Alloy wheels', 'Traction control', 'Automati...",68934.03
256,Aston Martin DB5,Diesel Engine,1996,7685.0,False,"['Air conditioning', '4 X 4', 'Automatic trans...",122110.90


In [4]:
dataset.dtypes

Name            object
Engine          object
Year             int64
Mileage        float64
Zero_km           bool
Accessories     object
Value          float64
dtype: object

In [5]:
# Descriptive Statistics Overview

dataset[['Mileage', 'Value']].describe()

Unnamed: 0,Mileage,Value
count,197.0,258.0
mean,58278.42132,98960.513101
std,35836.733259,29811.932305
min,107.0,50742.1
25%,27505.0,70743.5125
50%,55083.0,97724.38
75%,90495.0,124633.3025
max,119945.0,149489.92


In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258 entries, 0 to 257
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         258 non-null    object 
 1   Engine       258 non-null    object 
 2   Year         258 non-null    int64  
 3   Mileage      197 non-null    float64
 4   Zero_km      258 non-null    bool   
 5   Accessories  258 non-null    object 
 6   Value        258 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(3)
memory usage: 12.5+ KB


# Tuples

Tuples are **immutable** sequences that are used to store collections of items, usually **heterogeneous**. They can be built in several ways:
```
- Using a pair of parentheses: ( )
- Using a trailing comma: x,
- Using a pair of parentheses with comma-separated items: ( x, y, z )
- Using: tuple() or tuple(iterator)
```

In [None]:
()

In [None]:
1, 2, 3

In [None]:
name = 'Passat'
value = 153000
(name, value)

In [None]:
names_cars = ('Jetta Variant', 'Passat', 'Crossfox', 'DS5')
names_cars

In [None]:
type(names_cars)

## Tuple selections

In [None]:
names_cars = ('Jetta Variant', 'Passat', 'Crossfox', 'DS5')
names_cars

In [None]:
names_cars[0]

In [None]:
names_cars[1]

In [None]:
names_cars[-1]

In [None]:
names_cars[1:3]

In [None]:
names_cars = ('Jetta Variant', 'Passat', 'Crossfox', 'DS5', ('Fusca', 'Gol', 'C4'))
names_cars

In [None]:
names_cars[-1]

In [None]:
names_cars[-1][1]

## Iterating on tuples

In [None]:
names_cars = ('Jetta Variant', 'Passat', 'Crossfox', 'DS5')
names_cars

In [None]:
for item in names_cars:
  print(item)

## Tuple unpacking

In [None]:
names_cars = ('Jetta Variant', 'Passat', 'Crossfox', 'DS5')
names_cars

In [None]:
car_1, car_2, car_3, car_4 = names_cars

In [None]:
car_1

In [None]:
car_2

In [None]:
car_3

In [None]:
car_4

In [None]:
_, A, _, B = names_cars

In [None]:
A

In [None]:
B

In [None]:
_, C, *_ = names_cars   # ignore the rest after C

In [None]:
C

## *zip()*

https://docs.python.org/3.6/library/functions.html#zip

In [None]:
cars = ['Jetta Variant', 'Passat', 'Crossfox', 'DS5']
cars

In [None]:
values = [88078.64, 106161.94, 72832.16, 124549.07]
values

In [None]:
list(zip(cars, values))

In [None]:
for item in zip(cars, values):
  print(item)

In [None]:
for car, value in zip(cars, values):
  print(car, value)

In [None]:
for car, value in zip(cars, values):
  if(value > 100000):
    print(car)

# Dictionaries

Lists are sequential collections, that is, the items in these sequences are ordered and use indices (integers) to access values.

Dictionaries are slightly different collections. They are data structures that represent a type of mapping. Mappings are collections of associations between value pairs where the first element of the pair is known as a key (*key*) and the second as a value (*value*).

```
dictionary = {key_1: value_1, key_2: value_2, ..., key_n: value_n}
```

https://docs.python.org/3.6/library/stdtypes.html#typesmapping

In [None]:
cars = ['Jetta Variant', 'Passat', 'Crossfox']
cars

In [None]:
values = [88078.64, 106161.94, 72832.16]
values

In [None]:
cars.index('Passat')

In [None]:
values[cars.index('Passat')]

In [None]:
data = {'Jetta Variant': 88078.64, 'Passat': 106161.94, 'Crossfox': 72832.16}
data

In [None]:
type(data)

## Creating dictionaries with *zip()*

In [None]:
list(zip(cars, values))

In [None]:
data = dict(zip(cars, values))
data

## Operations with dictionaries

In [None]:
data = {'Jetta Variant': 88078.64, 'Passat': 106161.94, 'Crossfox': 72832.16}
data

### *dict[ key ]*

Returns the value corresponding to the *key* in the dictionary.

In [None]:
data['Passat']

### *key in dict*

Returns **True** if the *key* is found in the dictionary.

In [None]:
'Passat' in data

In [None]:
'Fusca' in data

In [None]:
'Fusca' not in data

### *len(dict)*

Returns the number of dictionary items.

In [None]:
len(data)

### *dict[ key ] = value*

Adds an item to the dictionary.

In [None]:
data['DS5'] = 124549.07

In [None]:
data

### *del dict[ key ]*

Removes the *key* item from the dictionary.

In [None]:
data

In [None]:
del data['Passat']
data

## Dictionary methods

https://docs.python.org/3.6/tutorial/datastructures.html#dictionaries

https://docs.python.org/3.6/library/stdtypes.html#typesmapping

### *dict.update()*

Update the dictionary.

In [None]:
data

In [None]:
data.update({'Passat': 106161.94})
data

In [None]:
data.update({'Passat': 106161.95, 'Fusca': 150000})
data

### *dict.copy()*

Creates a copy of the dictionary.

In [None]:
dataCopy = data.copy()

In [None]:
dataCopy

In [None]:
del dataCopy['Fusca']
dataCopy

In [None]:
data

### *dict.pop(key[, default ])*

If the key is found in the dictionary, the item is removed and its value returned. Otherwise, the value specified as *default* is returned. If the *default* value is not provided and the key is not found in the dictionary an error will be generated.

In [None]:
dataCopy

In [None]:
dataCopy.pop('Passat')

In [None]:
dataCopy

In [None]:
# dataCopy.pop('Passat')

In [None]:
dataCopy.pop('Passat', 'Key not found')

In [None]:
dataCopy.pop('DS5', 'Key not found')

In [None]:
dataCopy

### *dict.clear()*

Removes all items from the dictionary.

In [None]:
dataCopy.clear()

In [None]:
dataCopy

## Iterating through dictionaries

In [None]:
data = {'Crossfox': 72832.16, 'DS5': 124549.07, 'Fusca': 150000, 'Jetta Variant': 88078.64, 'Passat': 106161.95}
data

### *dict.keys()*

Returns a list containing the *keys* of the dictionary.

In [None]:
data.keys()

dict_keys(['Jetta Variant', 'Passat', 'Crossfox'])

In [None]:
for key in data.keys():
  print(data[key])

88078.64
106161.94
72832.16


### *dict.values()*

Returns a list of all *values* in the dictionary.

In [None]:
data.values()

dict_values([88078.64, 106161.94, 72832.16])

### *dict.items()*

Returns a list containing one tuple for each *key-value* pair in the dictionary.

In [None]:
data.items()

In [None]:
for item in data.items():
  print(item)

In [None]:
for key, value in data.items():
  print(key, value)


In [None]:
for key, value in data.items():
  if(value > 100000):
    print(key)

# Built-in functions

The Python language has several built-in functions that are always accessible. 

Some we have already used in our training: *type()*, *print()*, *zip()*, *len()*, set() etc.

https://docs.python.org/3.6/library/functions.html

In [None]:
data = {'Jetta Variant': 88078.64, 'Passat': 106161.94, 'Crossfox': 72832.16}
data

In [None]:
values = []
for value in data.values():
  values.append(value)
values

In [None]:
total = 0
for value in data.values():
  total += value
total

In [None]:
list(data.values())

In [None]:
sum(data.values())

In [None]:
help(print)

# Functions without and with parameters

### Functions without parameters

#### Standard format

```
def <name>():
    <instructions>
```

In [None]:
def avg():
  value = (1 + 2 + 3) / 3
  print(value)

In [None]:
avg()

### Functions with parameters

#### Standard format

```
def <name>(<param_1>, <param_2>, ..., <param_n>):
    <instructions>
```

In [None]:
def avg(number_1, number_2, number_3):
  value = (number_1 + number_2 + number_3) / 3
  print(value)

In [None]:
avg(1, 2, 3)

In [None]:
avg(23, 45, 67)

In [None]:
def avg(listObj):
  value = sum(listObj) / len(listObj)
  print(value)

In [None]:
result = avg([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
result

In [None]:
type(result)

# Functions that return values


### Functions that return a value

#### Standard format

```
def <name>(<param_1>, <param_2>, ..., <param_n>):
     <instructions>
     return <result>
```

In [None]:
def avg(listObj):
  value = sum(listObj) / len(listObj)
  return value

In [None]:
avg([1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
result = avg([1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
result

### Functions that return more than one value

#### Standard format

```
def <name>(<param_1>, <param_2>, ..., <param_n>):
     <instructions>
     return (<result_1>, <result_2>, ..., <result_n>)
```

In [None]:
def avg(listObj):
  value = sum(listObj) / len(listObj)
  return (value, len(listObj))

In [None]:
avg([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
result, n = avg([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
result

In [None]:
n