In [None]:
# Import pandas and show its version:
import pandas as pd
print(f'pandas version {pd.__version__} installed.')
# Also import numpy
import numpy as np

In [None]:
# There are two types of data structures in pandas: Series and Dataframes.
# Let's look at Series first. Series are one-dimensional labelled arrays
data = ['circle', 'square', 'triangle', 'rectangle']
ser = pd.Series(data, index=['a', 'b', 'c', 'd'])
print(ser, '\n')
data = np.array([2.3, 5.4, -3.4, 55, 9.2, 3])
ser = pd.Series(data)
print(ser)

In [None]:
# Indexing and slicing work as expected:
ser[2:4]

In [None]:
# Dataframes have labelled columns, like a table in Excel; each column is a Series.
famous_scientists = {'first_name': ['Albert', 'Marie', 'Isaac', 'Charles', 'Erwin', 'Francis'],
                     'last_name': ['Einstein', 'Curie', 'Newton', 'Darwin', 'Schrödinger', 'Crick'],
                     'field': ['physics', 'physics and chemistry', 'physics',
                               'biology and geology', 'physics', 'biology'],
                     'year_of_birth': [1879, 1867, 1642, 1809, 1887, 1916],
                     'place_of_birth': ['Ulm, Germany', 'Warsaw, Poland', 'Woolsthorpe, England',
                                        'Shrewsbury, England', 'Vienna, Austria', 'Northampton, England']}
df = pd.DataFrame(famous_scientists)

# You can also add the rows as a list of lists (same resulting dataframe):
df2 = pd.DataFrame([['Albert', 'Einstein', 'physics', 1879, 'Ulm, Germany'],
                    ['Marie', 'Curie', 'physics and chemistry', 1867, 'Warsaw, Poland'],
                    ['Isaac', 'Newton', 'physics', 1642, 'Woolsthorpe, England'],
                    ['Charles', 'Darwin', 'biology and geology', 1809, 'Shrewsbury, England'],
                    ['Erwin', 'Schrödinger', 'physics', 1887, 'Vienna, Austria'],
                    ['Francis', 'Crick', 'biology', 1916, 'Northampton, England']],
                   columns = ['first_name', 'last_name', 'field', 'year_of_birth', 'place_of_birth'])
# Element-wise comparison:
df == df2

df  # compare with print(df)  - Jupyter uses pretty formatting

In [None]:
# Access values inside the dataframe:
print(df.iloc[0, 1])
print(df.loc[0, 'last_name'])  
print(df.iloc[2, 3])
print(df.loc[3, 'field'])
# iloc works with index positions (zero-based indexing), loc with labels.

In [None]:
# Select a row:
print(df.iloc[0])
print('\n')
# Select a column:
print(df.loc[:, 'field'])   # Does the same as: df['field'] and df.field

In [None]:
# Insert a column by appending it to `df`
df['new_column'] = [1, 2, 3, 4, 5, 6]

# You can also create a new column and set all of its rows to the same value:
df['new_column'] = 'hi there'

# Or you can set it to values computed from another column:
df['new_column'] = df.year_of_birth + 1000

df

In [None]:
# Delete the newly added column again:
df.drop('new_column', axis=1, inplace=True)    # axis=1 -> for a given column along all rows
                                               # axis=0 -> for a given row along all columns
df

In [None]:
# Add new row at the end of the dataframe:
df.loc[len(df)] = ['Friedrich', 'Miescher', 'biology', '1844', 'Basel, Switzerland']
df

In [None]:
# Let's write this data to a csv file:
df.to_csv('scientists.csv')

In [None]:
# Read a dataset from a file:
df = pd.read_csv('game_of_thrones.csv', delimiter=',')  

In [None]:
# What's in it? Look at the first ten rows:
df.head(10)
# df.tail()

In [None]:
# Read from and write to csv and Excel files with options:
new_df = pd.read_csv('file.csv', header=None, nrows=5)
new_df.to_csv('my_file.csv')

new_df = pd.read_excel('file.xlsx')
new_df.to_excel('my_file.xlsx',  sheet_name='Sheet1')