In [31]:
import pandas as pd

# Series

Series are columns of a dataframe

## Series with normal index

In [32]:
# Series
values = [10, 20, 30, 40, 50]
index = ['a', 'b', 'c', 'd', 'e']
s = pd.Series(values, index=index)
s

a    10
b    20
c    30
d    40
e    50
dtype: int64

In [33]:
s.loc['a']

np.int64(10)

## Series with duplicate index

In [34]:
index_with_duplicates = ['a', 'b', 'c', 'd', 'a']
s = pd.Series(values, index=index_with_duplicates)
s

a    10
b    20
c    30
d    40
a    50
dtype: int64

In [35]:
s.loc['a']

a    10
a    50
dtype: int64

# DataFrames

Every key in the dict is a column

## DataFrame from dict

In [36]:
df = pd.DataFrame({
    'name': ['Mike', 'Bob', 'Alice'],
    'age': [30, 80, 45],
    'job': ['Programmer', 'Clerk', 'Designer']
})
df

Unnamed: 0,name,age,job
0,Mike,30,Programmer
1,Bob,80,Clerk
2,Alice,45,Designer


## set index of Dataframe to an existing column

In [37]:
df = df.set_index('name')
df

Unnamed: 0_level_0,age,job
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Mike,30,Programmer
Bob,80,Clerk
Alice,45,Designer


In [38]:
df.loc['Mike']

age            30
job    Programmer
Name: Mike, dtype: object

## index alignment when performing operations
the values of the two dfs are summed based on automatic index 0, 1, 2

In [39]:
df1 = pd.DataFrame({
    'a': [1, 2, 3]
})
df2 = pd.DataFrame({
    'a': [10, 20, 30]
})
df1 + df2


Unnamed: 0,a
0,11
1,22
2,33


the values of the two dfs are summed based on the index values

df1 + df2 = df1[a][n] + df2.[a][n]

In [40]:
# a[0] = 1
# a[1] = 2
# a[2] = 3
f1 = pd.DataFrame({
    'a': [1, 2, 3]
}, index=[0, 1, 2])

# a[0] = 30
# a[1] = 20
# a[2] = 10
df2 = pd.DataFrame({
    'a': [10, 20, 30]
}, index=[2, 1, 0])

# 1+30, 2+20, 3+10
df1 + df2

Unnamed: 0,a
0,31
1,22
2,13


## export df
### reset index

In [41]:
df = df.reset_index()
df

Unnamed: 0,name,age,job
0,Mike,30,Programmer
1,Bob,80,Clerk
2,Alice,45,Designer


### export to csv
#### with unnamed index

In [42]:
df.to_csv('data_with_unnamed_index.csv')

#### reimport with unnamed index

In [43]:
dfin = pd.read_csv('data_with_unnamed_index.csv')
dfin

Unnamed: 0.1,Unnamed: 0,name,age,job
0,0,Mike,30,Programmer
1,1,Bob,80,Clerk
2,2,Alice,45,Designer


#### reimport without unnamed index

In [44]:
dfin = pd.read_csv('data_with_unnamed_index.csv', index_col=0)
dfin

Unnamed: 0,name,age,job
0,Mike,30,Programmer
1,Bob,80,Clerk
2,Alice,45,Designer


#### export without unnamed index

In [45]:
df.to_csv('data_without_unnamed_index.csv', index=None)

#### reimport 

In [46]:
dfin = pd.read_csv('data_without_unnamed_index.csv')
dfin

Unnamed: 0,name,age,job
0,Mike,30,Programmer
1,Bob,80,Clerk
2,Alice,45,Designer


#### export to json

In [47]:
df.to_json('data.json', indent=4)

#### export to dict

In [48]:
df.to_dict()

{'name': {0: 'Mike', 1: 'Bob', 2: 'Alice'},
 'age': {0: 30, 1: 80, 2: 45},
 'job': {0: 'Programmer', 1: 'Clerk', 2: 'Designer'}}