# DATAFRAME

In [1]:
import pandas as pd
import numpy as np

## DataFrame creation

### Using a dictionary of Series

In [29]:
indices1 = ['price', 'count', 'description']
prods = {
    'iPhone': pd.Series([699.9, 425, 'mobile'], index=indices1),
    'iPad': pd.Series([799.5, 169, 'tablet'], index=indices1),
    'MacBook': pd.Series([2309.2, 77, 'laptop'], index=indices1)
}
d1 = pd.DataFrame(prods)
d1

Unnamed: 0,MacBook,iPad,iPhone
price,2309.2,799.5,699.9
count,77,169,425
description,laptop,tablet,mobile


### Using a dictionary of ndarrays/lists

In [13]:
items = {
    'author': ['Mr 1', 'Mr 2', 'Mr 3'],
    'rating': [4.2, 4.5, 3.8],
    'reviews': [234, 78, 119]
}
d2 = pd.DataFrame(items, index=['book1', 'book2', 'book3'])
d2

Unnamed: 0,author,rating,reviews
book1,Mr 1,4.2,234
book2,Mr 2,4.5,78
book3,Mr 3,3.8,119


### Using a structured array

In [18]:
students = np.zeros(4, dtype=[('name', 'a15'), ('age', 'i4'), ('grade', 'f4')])
students[:] = [
    ('Henry', 12, 9.28),
    ('Peter', 14, 8.76),
    ('Susan', 15, 8.88),
    ('John', 11, 6.54)
]
d3 = pd.DataFrame(students, index=['s1', 's2', 's3', 's4'])
d3

Unnamed: 0,name,age,grade
s1,b'Henry',12,9.28
s2,b'Peter',14,8.76
s3,b'Susan',15,8.88
s4,b'John',11,6.54


### [DataFrame.from_dict](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.from_dict.html)

In [23]:
houses = {
    'house 1': {'area': 45, 'price': 260},
    'house 2': {'area': 75, 'price': 502},
    'house 3': {'area': 112, 'price': 1104}
}
d4 = pd.DataFrame.from_dict(houses, orient='index')
d4

Unnamed: 0,area,price
house 1,45,260
house 2,75,502
house 3,112,1104


### [DataFrame.from_records](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.from_records.html)

In [26]:
tvs = [('A2345', 'Samsung'), ('B9345', 'Sony'), ('D4325', 'LG')]
d5 = pd.DataFrame.from_records(tvs, columns=['code', 'manufacturer'], index=['tv1', 'tv2', 'tv3'])
d5

Unnamed: 0,code,manufacturer
tv1,A2345,Samsung
tv2,B9345,Sony
tv3,D4325,LG


### [DataFrame.from_items](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.from_items.html)

In [37]:
countries = [
    ('USA', [144, 65, 456]),
    ('Russia', [134, 342, 222]),
    ('China', [99, 255, 211])
]
d6 = pd.DataFrame.from_items(countries, columns=['Gold', 'Silver', 'Bronze'], orient='index')
d6

Unnamed: 0,Gold,Silver,Bronze
USA,144,65,456
Russia,134,342,222
China,99,255,211


In [38]:
a = np.array([1, 3, 5])
a

array([1, 3, 5])

In [39]:
a.sum()

9

In [40]:
a = np.array([[1, 2, 4], [2, 4, 8], [4, 8, 16]])

In [41]:
a.sum()

49

In [42]:
a.sum(axis='column')

TypeError: an integer is required

In [43]:
a.sum(axis=1)

array([ 7, 14, 28])