# Data Structures

In [1]:
import pandas as pd

In [2]:
revenues = pd.Series([5555, 7000, 1980])

In [3]:
revenues

0    5555
1    7000
2    1980
dtype: int64

In [4]:
revenues.index

RangeIndex(start=0, stop=3, step=1)

In [5]:
revenues.values

array([5555, 7000, 1980], dtype=int64)

In [6]:
revenues[0]

5555

In [7]:
type(revenues.values)

numpy.ndarray

In [8]:
city_revenues = pd.Series(
    [4200, 8000, 6500],
    index=["Amsterdam", "Toronto", "Tokyo"]
)

In [9]:
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [10]:
city_revenues["Amsterdam"]

4200

In [11]:
city_employee_count = pd.Series({"Amsterdam": 5 , "Tokyo": 8})

In [12]:
city_employee_count

Amsterdam    5
Tokyo        8
dtype: int64

In [13]:
city_employee_count["Tokyo"]

8

In [14]:
city_employee_count.keys()

Index(['Amsterdam', 'Tokyo'], dtype='object')

In [15]:
"Tokyo" in city_employee_count

True

In [16]:
"New York" in city_employee_count

False

In [17]:
0 in city_employee_count

False

In [18]:
city_data = pd.DataFrame({
    "employee_count": city_employee_count,
    "city_revenues": city_revenues
})
city_data

Unnamed: 0,employee_count,city_revenues
Amsterdam,5.0,4200
Tokyo,8.0,6500
Toronto,,8000


In [19]:
city_data.index

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [20]:
city_data.values

array([[5.0e+00, 4.2e+03],
       [8.0e+00, 6.5e+03],
       [    nan, 8.0e+03]])

In [21]:
city_data.axes

[Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object'),
 Index(['employee_count', 'city_revenues'], dtype='object')]

In [22]:
city_data.axes[0]

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [23]:
city_data.axes[1]

Index(['employee_count', 'city_revenues'], dtype='object')

In [24]:
city_data.keys()

Index(['employee_count', 'city_revenues'], dtype='object')

In [25]:
"Amsterdam" in city_data

False

In [26]:
"revenue" in city_data

False

In [27]:
"city_revenues" in city_data

True

In [28]:
city_revenues

Amsterdam    4200
Toronto      8000
Tokyo        6500
dtype: int64

In [29]:
city_revenues["Toronto"]

8000

In [30]:
city_revenues[1]

8000

In [31]:
city_revenues[-1]

6500

In [32]:
city_revenues[1:]

Toronto    8000
Tokyo      6500
dtype: int64

In [33]:
city_revenues["Toronto":]

Toronto    8000
Tokyo      6500
dtype: int64

In [34]:
color = pd.Series(["red", "purple", "blue", "green", "yellow"],
                 index = [1,2,3,5,8])
color

1       red
2    purple
3      blue
5     green
8    yellow
dtype: object

In [35]:
color.loc[1]

'red'

In [36]:
color.iloc[1]

'purple'

In [37]:
color.iloc[0:3]

1       red
2    purple
3      blue
dtype: object

In [38]:
color.loc[3:8]

3      blue
5     green
8    yellow
dtype: object

In [39]:
color.loc[1:3]

1       red
2    purple
3      blue
dtype: object

In [40]:
color.iloc[-2]

'green'

In [41]:
city_data["city_revenues"]

Amsterdam    4200
Tokyo        6500
Toronto      8000
Name: city_revenues, dtype: int64

In [42]:
type(city_data["city_revenues"])

pandas.core.series.Series

In [43]:
city_data.city_revenues

Amsterdam    4200
Tokyo        6500
Toronto      8000
Name: city_revenues, dtype: int64

In [44]:
toys = pd.DataFrame([
    {"name": "ball", "shape": "sphere"},
    {"name": "Rubiks cube", "shape": "cube"}
])
toys["shape"]

0    sphere
1      cube
Name: shape, dtype: object

In [45]:
toys.shape

(2, 2)

In [46]:
city_data.loc["Amsterdam"]

employee_count       5.0
city_revenues     4200.0
Name: Amsterdam, dtype: float64

In [47]:
city_data.loc["Tokyo": "Toronto"]

Unnamed: 0,employee_count,city_revenues
Tokyo,8.0,6500
Toronto,,8000


In [48]:
city_data.iloc[1]

employee_count       8.0
city_revenues     6500.0
Name: Tokyo, dtype: float64

In [49]:
city_data.loc["Amsterdam": "Tokyo", "city_revenues"]

Amsterdam    4200
Tokyo        6500
Name: city_revenues, dtype: int64

In [50]:
city_revenues.sum()

18700

In [51]:
city_revenues.max()

8000

In [52]:
city_revenues.min()

4200

In [53]:
city_revenues.mean()

6233.333333333333

In [58]:
further_city_data = pd.DataFrame(
    {"city_revenues": [7000, 3400], "employee_count": [2, 2]},
    index=["New York", "Barcelona"]
)

In [59]:
further_city_data

Unnamed: 0,city_revenues,employee_count
New York,7000,2
Barcelona,3400,2


In [64]:
all_city_data = pd.concat([city_data, further_city_data], sort=False)

In [65]:
all_city_data

Unnamed: 0,employee_count,city_revenues
Amsterdam,5.0,4200
Tokyo,8.0,6500
Toronto,,8000
New York,2.0,7000
Barcelona,2.0,3400


In [68]:
city_countries = pd.DataFrame({
    "country": ["Holland", "Japan", "Holland", "Canada", "Spain"],
    "capital": [1,1,0,0,0]},
    index = ["Amsterdam", "Tokyo", "Rotterdam", "Toronto", "Barcelona"]
)

In [71]:
cities = pd.concat([all_city_data, city_countries], axis=1, sort=False)

In [72]:
cities

Unnamed: 0,employee_count,city_revenues,country,capital
Amsterdam,5.0,4200.0,,
Tokyo,8.0,6500.0,,
Toronto,,8000.0,,
New York,2.0,7000.0,,
Barcelona,2.0,3400.0,,
Amsterdam,,,Holland,1.0
Tokyo,,,Japan,1.0
Rotterdam,,,Holland,0.0
Toronto,,,Canada,0.0
Barcelona,,,Spain,0.0
