In [2]:
import pandas as pd

# https://pandas.pydata.org/pandas-docs/stable/

# https://matplotlib.org/index.html for visualisations


In [3]:
# Part 1 Series

# 1. 1-D data
# 2. Index
# 3. Dictionary input


In [4]:
series = pd.Series(dtype='float64')

print("{}\n".format(series))

Series([], dtype: float64)



In [5]:
series = pd.Series(5)
print("{}\n".format(series))

0    5
dtype: int64



In [7]:
series = pd.Series([1, 2, 3])
print('{}\n'.format(series))


0    1
1    2
2    3
dtype: int64



In [9]:
import numpy as np

series = pd.Series([1, 2, 3])
print('{}\n'.format(series))

series = pd.Series([1, 2.2]) # upcasting
print('{}\n'.format(series))

arr = np.array([1, 2]) 
series = pd.Series(arr, dtype=np.float32) # most useful
print('{}\n'.format(series))

series = pd.Series([[1, 2], [3, 4]])
print('{}\n'.format(series))

0    1
1    2
2    3
dtype: int64

0    1.0
1    2.2
dtype: float64

0    1.0
1    2.0
dtype: float32

0    [1, 2]
1    [3, 4]
dtype: object



In [10]:
# 2 Index

series = pd.Series([1,2,3], index = ['a','b','c'])
print("{}\n".format(series))

a    1
b    2
c    3
dtype: int64



In [12]:
series = pd.Series([1,2,3], index=['a',8,0.3])
print("{}\n".format(series))

# The values in the index list can be any hashable type.

a      1
8      2
0.3    3
dtype: int64



In [13]:
# 3. Because index requires to be hashable, dictionary input is natural

series = pd.Series({"a":1,"b":2,"c":3})
print(repr(series))

a    1
b    2
c    3
dtype: int64


In [14]:
series = pd.Series({'b':2, 'a':1, 'c':3})
print(repr(series))

b    2
a    1
c    3
dtype: int64


In [15]:
# Back Bones of Pandas

# Part 2 : DataFrame

# 1. 2-D data
# 2. Upcasting 
# 3. Appending rows
# 4. Dropping data

import pandas as pd
import numpy as np

In [16]:
# work with tabular data which is a 2D data

df = pd.DataFrame()
print(repr(df))


Empty DataFrame
Columns: []
Index: []


In [17]:
df = pd.DataFrame([5,6])
print(repr(df))

   0
0  5
1  6


In [19]:
df = pd.DataFrame([[5,6]])
print(repr(df))

   0  1
0  5  6


In [20]:
df = pd.DataFrame([[5,6],[1,3]],index=["r1","r2"],columns = ["c1","c2"])
print(repr(df))

    c1  c2
r1   5   6
r2   1   3


In [22]:
data = {"c1":[1,2],"c2":[3,4]} # dict keys is a column labels
df = pd.DataFrame(data,index=["r1","r2"])
print(repr(df))

    c1  c2
r1   1   3
r2   2   4


In [23]:
# 2. Upcasting happens on column level 

upcast = pd.DataFrame([[5,6],[1.2,3]])
print(repr(upcast))
print(upcast.dtypes)

     0  1
0  5.0  6
1  1.2  3
0    float64
1      int64
dtype: object


In [2]:
import pandas as pd
# 3. Appeding rows formerely after april 2023 become pd.concat
df = pd.DataFrame([[5, 6], [1.2, 3]])
ser = pd.Series([0, 0],index =df.columns, name='r3')

df_app = pd.concat([df, ser.to_frame().T])
print(repr(df_app))


      0  1
0   5.0  6
1   1.2  3
r3  0.0  0


In [7]:
# Original DataFrame setup for context
df = pd.DataFrame([[5, 6], [1.2, 3]])

df2 = pd.DataFrame([[0, 9], [0, 9]])

df_app = pd.concat([df, df2], ignore_index=True)

# Displaying the corrected DataFrame
print(df_app)

     0  1
0  5.0  6
1  1.2  3
2  0.0  9
3  0.0  9


In [8]:
# 4 Dropping data

df = pd.DataFrame({'c1': [1, 2], 'c2': [3, 4],
                   'c3': [5, 6]},
                  index=['r1', 'r2'])
print(repr(df))

    c1  c2  c3
r1   1   3   5
r2   2   4   6


In [9]:
df_drop = df.drop(labels='r1')
print(df_drop)

    c1  c2  c3
r2   2   4   6


In [11]:
df_drop = df.drop(labels=["c1","c3"], axis=1)
print(df_drop)

    c2
r1   3
r2   4


In [13]:
df_drop = df.drop(index="r2")
print(df_drop)

    c1  c2  c3
r1   1   3   5


In [14]:
df_drop = df.drop(columns="c2")
print(df_drop)

    c1  c3
r1   1   5
r2   2   6


In [15]:
df_drop = df.drop(index="r2",columns="c2")
print(df_drop)

    c1  c3
r1   1   5
