In [1]:
import pandas as pd

## Creating a data frame

### Data

In [2]:
# Data lists
names = ['Alexander', 'Robert', 'Cynthia', 'Jessica', 'Alan', 'Danny', 'Maria', 'Karim']
ages = [29, 19, 21, 45, 34, 34, 51, 27]
cities = ['Moscow', 'New York', 'Paris', 'Berlin', 'Tokyo', 'Berlin', 'Madrid', 'Tomsk']
test_scores = [20, 72, 13, 43, 38, 34, 8, 89]

### Data dictionary

In [3]:
# data dictionary of pairs {column name: data}
test_data = {'Name': names, 'City': cities, 'Age': ages, 'Test score': test_scores}

In [4]:
# indices (names of rows)
id = ['id 001', 'id 002', 'id 003', 'id 004', 'id 005', 'id 006', 'id 007', 'id 008']

In [5]:
# creating a data frame
df = pd.DataFrame(test_data, index=id)

In [6]:
df

Unnamed: 0,Name,City,Age,Test score
id 001,Alexander,Moscow,29,20
id 002,Robert,New York,19,72
id 003,Cynthia,Paris,21,13
id 004,Jessica,Berlin,45,43
id 005,Alan,Tokyo,34,38
id 006,Danny,Berlin,34,34
id 007,Maria,Madrid,51,8
id 008,Karim,Tomsk,27,89


In [7]:
# checking the data frame type
type(df)

pandas.core.frame.DataFrame

In [8]:
# indices
df.index

Index(['id 001', 'id 002', 'id 003', 'id 004', 'id 005', 'id 006', 'id 007',
       'id 008'],
      dtype='object')

In [9]:
# columns
df.columns

Index(['Name', 'City', 'Age', 'Test score'], dtype='object')

In [10]:
type(df.columns), type(df.columns[2]), df.columns[2]

(pandas.core.indexes.base.Index, str, 'Age')

In [11]:
print("| ", end='', sep='')
for col in df.columns:
    print(col, ' | ', end='', sep='')

| Name | City | Age | Test score | 

In [12]:
# first rows (5 by default)
df.head()

Unnamed: 0,Name,City,Age,Test score
id 001,Alexander,Moscow,29,20
id 002,Robert,New York,19,72
id 003,Cynthia,Paris,21,13
id 004,Jessica,Berlin,45,43
id 005,Alan,Tokyo,34,38


In [13]:
# last rows (5 by default)
df.tail()

Unnamed: 0,Name,City,Age,Test score
id 004,Jessica,Berlin,45,43
id 005,Alan,Tokyo,34,38
id 006,Danny,Berlin,34,34
id 007,Maria,Madrid,51,8
id 008,Karim,Tomsk,27,89


In [14]:
df.head(3)

Unnamed: 0,Name,City,Age,Test score
id 001,Alexander,Moscow,29,20
id 002,Robert,New York,19,72
id 003,Cynthia,Paris,21,13


In [15]:
df.tail(2)

Unnamed: 0,Name,City,Age,Test score
id 007,Maria,Madrid,51,8
id 008,Karim,Tomsk,27,89


## Working With Rows and Columns in DataFrames

In [16]:
# Getting some specific column by indexing
df['City']

id 001      Moscow
id 002    New York
id 003       Paris
id 004      Berlin
id 005       Tokyo
id 006      Berlin
id 007      Madrid
id 008       Tomsk
Name: City, dtype: object

In [17]:
# Getting some specific column as an attribute
df.Age

id 001    29
id 002    19
id 003    21
id 004    45
id 005    34
id 006    34
id 007    51
id 008    27
Name: Age, dtype: int64

In [18]:
# Printing the type
type(df['City'])

pandas.core.series.Series

In [19]:
# getting the row by its user-defined index in the Pandas series
df['City']['id 005']

'Tokyo'

In [20]:
# getting the row by its 0-based index in the Pandas series
df['City'].iloc[0]

'Moscow'

In [21]:
# getting indices of all rows with some specific values
df.index[df['City'] == 'Berlin'].tolist()

['id 004', 'id 006']

In [22]:
df['Test score']

id 001    20
id 002    72
id 003    13
id 004    43
id 005    38
id 006    34
id 007     8
id 008    89
Name: Test score, dtype: int64

In [23]:
# getting a row by its user-defined index in the Pandas data frame
df.loc['id 002']

Name            Robert
City          New York
Age                 19
Test score          72
Name: id 002, dtype: object

In [24]:
type(df.loc['id 002'])

pandas.core.series.Series

In [25]:
df.loc['id 002'].Name

'Robert'

In [26]:
# getting a row by its 0-based index in the Pandas data frame
df.iloc[1]

Name            Robert
City          New York
Age                 19
Test score          72
Name: id 002, dtype: object

In [27]:
# getting specific rows by their user-defined indices
df.loc[['id 002', 'id 004']]

Unnamed: 0,Name,City,Age,Test score
id 002,Robert,New York,19,72
id 004,Jessica,Berlin,45,43


In [28]:
type(df.loc[['id 002', 'id 004']]) 

pandas.core.frame.DataFrame

In [29]:
# getting specific columns by their names
df[['Name', 'Test score']]

Unnamed: 0,Name,Test score
id 001,Alexander,20
id 002,Robert,72
id 003,Cynthia,13
id 004,Jessica,43
id 005,Alan,38
id 006,Danny,34
id 007,Maria,8
id 008,Karim,89


In [30]:
type(df[['Name', 'Test score']]) 

pandas.core.frame.DataFrame