# DATAFRAME

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

## DataFrame creation

### Using a dictionary of Series

In [29]:
indices1 = ['price', 'count', 'description']
prods = {
    'iPhone': pd.Series([699.9, 425, 'mobile'], index=indices1),
    'iPad': pd.Series([799.5, 169, 'tablet'], index=indices1),
    'MacBook': pd.Series([2309.2, 77, 'laptop'], index=indices1)
}
d1 = pd.DataFrame(prods)
d1

Unnamed: 0,MacBook,iPad,iPhone
price,2309.2,799.5,699.9
count,77,169,425
description,laptop,tablet,mobile


### Using a dictionary of ndarrays/lists

In [13]:
items = {
    'author': ['Mr 1', 'Mr 2', 'Mr 3'],
    'rating': [4.2, 4.5, 3.8],
    'reviews': [234, 78, 119]
}
d2 = pd.DataFrame(items, index=['book1', 'book2', 'book3'])
d2

Unnamed: 0,author,rating,reviews
book1,Mr 1,4.2,234
book2,Mr 2,4.5,78
book3,Mr 3,3.8,119


### Using a structured array

In [18]:
students = np.zeros(4, dtype=[('name', 'a15'), ('age', 'i4'), ('grade', 'f4')])
students[:] = [
    ('Henry', 12, 9.28),
    ('Peter', 14, 8.76),
    ('Susan', 15, 8.88),
    ('John', 11, 6.54)
]
d3 = pd.DataFrame(students, index=['s1', 's2', 's3', 's4'])
d3

Unnamed: 0,name,age,grade
s1,b'Henry',12,9.28
s2,b'Peter',14,8.76
s3,b'Susan',15,8.88
s4,b'John',11,6.54


### [DataFrame.from_dict](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.from_dict.html)

In [23]:
houses = {
    'house 1': {'area': 45, 'price': 260},
    'house 2': {'area': 75, 'price': 502},
    'house 3': {'area': 112, 'price': 1104}
}
d4 = pd.DataFrame.from_dict(houses, orient='index')
d4

Unnamed: 0,area,price
house 1,45,260
house 2,75,502
house 3,112,1104


### [DataFrame.from_records](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.from_records.html)

In [26]:
tvs = [('A2345', 'Samsung'), ('B9345', 'Sony'), ('D4325', 'LG')]
d5 = pd.DataFrame.from_records(tvs, columns=['code', 'manufacturer'], index=['tv1', 'tv2', 'tv3'])
d5

Unnamed: 0,code,manufacturer
tv1,A2345,Samsung
tv2,B9345,Sony
tv3,D4325,LG


### [DataFrame.from_items](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.from_items.html)

In [37]:
countries = [
    ('USA', [144, 65, 456]),
    ('Russia', [134, 342, 222]),
    ('China', [99, 255, 211])
]
d6 = pd.DataFrame.from_items(countries, columns=['Gold', 'Silver', 'Bronze'], orient='index')
d6

Unnamed: 0,Gold,Silver,Bronze
USA,144,65,456
Russia,134,342,222
China,99,255,211


### [panda.read_csv](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html#pandas.read_csv)

In [58]:
pd.read_csv('products.csv')

Unnamed: 0,name,code,price,quantity
0,Dell Inspiron,L8789,1589,45
1,MacBook,N0494,2744,35
2,Sony Vaio,B0043,1998,24


### [panda.read_table](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_table.html)

In [61]:
pd.read_table('customers.dat')

Unnamed: 0,name,nationality,company
0,Henry,USA,Samsung
1,Fung,China,Baidu
2,Robert,France,Roh


## Operations

In [6]:
films = [
    ('film 1', 4.8, 783),
    ('film 2', 2.7, 45),
    ('film 3', 3.9, 287)
]
dfOp1 = pd.DataFrame(films, columns=['name', 'rating', 'review'])
dfOp1

Unnamed: 0,name,rating,review
0,film 1,4.8,783
1,film 2,2.7,45
2,film 3,3.9,287


### Selection

In [7]:
dfOp1['rating']

0    4.8
1    2.7
2    3.9
Name: rating, dtype: float64

### Assignment and Insertion

In [13]:
dfOpAs1 = dfOp1.copy()
dfOpAs1['publishedAt'] = datetime.now()
dfOpAs1

Unnamed: 0,name,rating,review,publishedAt
0,film 1,4.8,783,2017-03-10 16:26:19.969429
1,film 2,2.7,45,2017-03-10 16:26:19.969429
2,film 3,3.9,287,2017-03-10 16:26:19.969429


In [14]:
dfOpIn1 = dfOp1.copy()
dfOpIn1.insert(1, 'discount', 0.2)
dfOpIn1

Unnamed: 0,name,discount,rating,review
0,film 1,0.2,4.8,783
1,film 2,0.2,2.7,45
2,film 3,0.2,3.9,287


### Deletion

In [10]:
dfOpDe1 = dfOp1.copy()
del dfOpDe1['review']
dfOpDe1

Unnamed: 0,name,rating
0,film 1,4.8
1,film 2,2.7
2,film 3,3.9


In [11]:
dfOpDe2 = dfOp1.copy()
names = dfOpDe2.pop('name')
names

0    film 1
1    film 2
2    film 3
Name: name, dtype: object

In [16]:
dfOpDe2

Unnamed: 0,rating,review
0,4.8,783
1,2.7,45
2,3.9,287


### Mathematical Operations

In [19]:
medals = [
    ('USA', [144, 65, 456]),
    ('Russia', [134, 342, 222]),
    ('China', [99, 255, 211])
]
dfOpMa = pd.DataFrame.from_items(medals, columns=['Gold', 'Silver', 'Bronze'], orient='index')
dfOpMa

Unnamed: 0,Gold,Silver,Bronze
USA,144,65,456
Russia,134,342,222
China,99,255,211


In [20]:
dfOpMa * 2

Unnamed: 0,Gold,Silver,Bronze
USA,288,130,912
Russia,268,684,444
China,198,510,422


In [21]:
np.sqrt(dfOpMa)

Unnamed: 0,Gold,Silver,Bronze
USA,12.0,8.062258,21.354157
Russia,11.575837,18.493242,14.899664
China,9.949874,15.968719,14.525839
