In [1]:
import pandas as pd
import numpy as np

# Series

In [2]:
pd.Series(np.random.randn(5)) #

0   -0.316041
1    1.154337
2    0.337165
3   -0.367809
4    1.421699
dtype: float64

In [3]:
s = pd.Series(np.random.randn(5), index = ['a1','a2','a3','a4','a5']) #
s

a1   -0.718433
a2   -0.696314
a3   -0.060310
a4    3.484915
a5    1.023656
dtype: float64

In [4]:
d = {"b": 1, "a": 0, "c": 2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [5]:
pd.Series(d, index=["b", "c", "d", "a"])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [6]:
pd.Series(111, index=["a", "b", "c", "d", "e"])

a    111
b    111
c    111
d    111
e    111
dtype: int64

In [7]:
pd.Series(111, index=["a", "b", "c", "d", "e"]).to_numpy() #converting into ndarray

array([111, 111, 111, 111, 111], dtype=int64)

In [8]:
s['a3']

-0.06030959555183694

In [9]:
s['a6'] #at this index value is none

KeyError: 'a6'

In [10]:
s.get('a6') #this function does not through any error

In [11]:
s.get('a1')

-0.7184334125719771

## Arithmetic Operation

In [12]:
s

a1   -0.718433
a2   -0.696314
a3   -0.060310
a4    3.484915
a5    1.023656
dtype: float64

In [13]:
s+s

a1   -1.436867
a2   -1.392629
a3   -0.120619
a4    6.969830
a5    2.047312
dtype: float64

In [14]:
s*10

a1    -7.184334
a2    -6.963144
a3    -0.603096
a4    34.849149
a5    10.236562
dtype: float64

In [15]:
s/2

a1   -0.359217
a2   -0.348157
a3   -0.030155
a4    1.742457
a5    0.511828
dtype: float64

In [16]:
s-10

a1   -10.718433
a2   -10.696314
a3   -10.060310
a4    -6.515085
a5    -8.976344
dtype: float64

## Slicing

In [17]:
a = pd.Series(np.random.rand(10))
a

0    0.795348
1    0.455890
2    0.016515
3    0.658011
4    0.569973
5    0.631681
6    0.125549
7    0.801739
8    0.826174
9    0.935977
dtype: float64

In [18]:
a[4:8]

4    0.569973
5    0.631681
6    0.125549
7    0.801739
dtype: float64

In [19]:
a[1:] + a[:-1]

0         NaN
1    0.911779
2    0.033031
3    1.316021
4    1.139947
5    1.263363
6    0.251098
7    1.603479
8    1.652347
9         NaN
dtype: float64

----------------------------

# DataFrame

In [20]:
d = {"one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
      "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
    }

In [21]:
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [22]:
pd.DataFrame(d, index=["d", "b", "a","z"])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0
z,,


In [23]:
pd.DataFrame(d, index=["d", "b", "a","z"], columns = ["two", "three", "one", "ten"] )

Unnamed: 0,two,three,one,ten
d,4.0,,,
b,2.0,,2.0,
a,1.0,,1.0,
z,,,,


In [24]:
df.index #row names

Index(['a', 'b', 'c', 'd'], dtype='object')

In [25]:
df.columns#columns name

Index(['one', 'two'], dtype='object')

---------------------

In [26]:
dict = {
    'one':[1,11,111,1111,11111],
    'two':[2,22,222,2222,22222],
    'three':[3,33,333,3333,33333],
    'four':[4,44,444,4444,44444],
    'five':[5,55,555,5555,55555]
}

In [27]:
df = pd.DataFrame(dict)
df

Unnamed: 0,one,two,three,four,five
0,1,2,3,4,5
1,11,22,33,44,55
2,111,222,333,444,555
3,1111,2222,3333,4444,5555
4,11111,22222,33333,44444,55555


In [28]:
df.head(3)

Unnamed: 0,one,two,three,four,five
0,1,2,3,4,5
1,11,22,33,44,55
2,111,222,333,444,555


In [29]:
df.tail(2)

Unnamed: 0,one,two,three,four,five
3,1111,2222,3333,4444,5555
4,11111,22222,33333,44444,55555


In [30]:
df.describe()

Unnamed: 0,one,two,three,four,five
count,5.0,5.0,5.0,5.0,5.0
mean,2469.0,4938.0,7407.0,9876.0,12345.0
std,4853.382326,9706.764652,14560.146977,19413.529303,24266.911629
min,1.0,2.0,3.0,4.0,5.0
25%,11.0,22.0,33.0,44.0,55.0
50%,111.0,222.0,333.0,444.0,555.0
75%,1111.0,2222.0,3333.0,4444.0,5555.0
max,11111.0,22222.0,33333.0,44444.0,55555.0


In [31]:
df.to_csv("number.csv") #save the csv

In [32]:
calander = pd.read_csv("calander.csv") #read any csv

In [33]:
calander

Unnamed: 0,sun,mon,tue,wed,thu,fri,sat
0,1,2,3,4,5,6,7
1,8,9,10,11,12,13,14
2,15,16,17,18,19,20,21
3,22,23,24,25,26,27,28


In [34]:
calander.index = ['week1','week2','week3','week4'] #changing index name
calander.columns = ['SUN','MON','TUE','WED','THURS','FRI','SAT'] #changing columns

In [35]:
calander

Unnamed: 0,SUN,MON,TUE,WED,THURS,FRI,SAT
week1,1,2,3,4,5,6,7
week2,8,9,10,11,12,13,14
week3,15,16,17,18,19,20,21
week4,22,23,24,25,26,27,28


In [36]:
calander.T

Unnamed: 0,week1,week2,week3,week4
SUN,1,8,15,22
MON,2,9,16,23
TUE,3,10,17,24
WED,4,11,18,25
THURS,5,12,19,26
FRI,6,13,20,27
SAT,7,14,21,28


In [39]:
calander.sort_index(axis=1, ascending=True)

Unnamed: 0,FRI,MON,SAT,SUN,THURS,TUE,WED
week1,6,2,7,1,5,3,4
week2,13,9,14,8,12,10,11
week3,20,16,21,15,19,17,18
week4,27,23,28,22,26,24,25


------------------------

In [76]:
ndf = pd.DataFrame(np.random.rand(300,5))

In [77]:
ndf

Unnamed: 0,0,1,2,3,4
0,0.520999,0.609558,0.115320,0.504631,0.287643
1,0.516316,0.684989,0.573067,0.435173,0.991730
2,0.994052,0.921432,0.715142,0.644942,0.399544
3,0.528942,0.688862,0.395623,0.119175,0.383105
4,0.416541,0.517179,0.357954,0.527374,0.819736
...,...,...,...,...,...
295,0.620130,0.640198,0.696437,0.930883,0.126740
296,0.065883,0.780831,0.773273,0.589305,0.074464
297,0.734549,0.466462,0.111590,0.380867,0.190806
298,0.989246,0.495159,0.007100,0.823159,0.277637


In [78]:
ndf.index = np.arange(10,310)
ndf.columns = np.arange(11,16)

In [79]:
ndf

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.115320,0.504631,0.287643
11,0.516316,0.684989,0.573067,0.435173,0.991730
12,0.994052,0.921432,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736
...,...,...,...,...,...
305,0.620130,0.640198,0.696437,0.930883,0.126740
306,0.065883,0.780831,0.773273,0.589305,0.074464
307,0.734549,0.466462,0.111590,0.380867,0.190806
308,0.989246,0.495159,0.007100,0.823159,0.277637


In [80]:
ndf.head()

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.11532,0.504631,0.287643
11,0.516316,0.684989,0.573067,0.435173,0.99173
12,0.994052,0.921432,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736


In [81]:
ndf1 = ndf

In [82]:
ndf1[11][11] = 'obj'

In [83]:
ndf.head()

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.11532,0.504631,0.287643
11,obj,0.684989,0.573067,0.435173,0.99173
12,0.994052,0.921432,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736


In [84]:
ndf2 = ndf.copy()

In [85]:
ndf2[11][11] = "hii"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf2[11][11] = "hii"


In [88]:
ndf.head()

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.11532,0.504631,0.287643
11,obj,0.684989,0.573067,0.435173,0.99173
12,0.994052,0.921432,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736


In [89]:
ndf.dtypes

11     object
12    float64
13    float64
14    float64
15    float64
dtype: object

In [92]:
ndf.loc[12,12] = int(555)

In [95]:
ndf.head()

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.11532,0.504631,0.287643
11,obj,0.684989,0.573067,0.435173,0.99173
12,0.994052,555.0,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736


In [96]:
ndf.drop(11)#removing row

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.115320,0.504631,0.287643
12,0.994052,555.000000,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736
15,0.285983,0.389864,0.168212,0.018877,0.309620
...,...,...,...,...,...
305,0.62013,0.640198,0.696437,0.930883,0.126740
306,0.065883,0.780831,0.773273,0.589305,0.074464
307,0.734549,0.466462,0.111590,0.380867,0.190806
308,0.989246,0.495159,0.007100,0.823159,0.277637


In [97]:
ndf.drop(12,axis = 1)

Unnamed: 0,11,13,14,15
10,0.520999,0.115320,0.504631,0.287643
11,obj,0.573067,0.435173,0.991730
12,0.994052,0.715142,0.644942,0.399544
13,0.528942,0.395623,0.119175,0.383105
14,0.416541,0.357954,0.527374,0.819736
...,...,...,...,...
305,0.62013,0.696437,0.930883,0.126740
306,0.065883,0.773273,0.589305,0.074464
307,0.734549,0.111590,0.380867,0.190806
308,0.989246,0.007100,0.823159,0.277637


In [99]:
ndf.loc[[14,16],[13,15]] #name of index and columns

Unnamed: 0,13,15
14,0.357954,0.819736
16,0.472731,0.380721


In [103]:
ndf.head(10)

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.11532,0.504631,0.287643
11,obj,0.684989,0.573067,0.435173,0.99173
12,0.994052,555.0,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736
15,0.285983,0.389864,0.168212,0.018877,0.30962
16,0.728964,0.058689,0.472731,0.148497,0.380721
17,0.112722,0.044315,0.173259,0.575639,0.500378
18,0.890054,0.738436,0.525604,0.285546,0.461437
19,0.090467,0.624978,0.625836,0.488846,0.925675


In [107]:
ndf.iloc[0,0] #index number of of row and columns

0.5209990137275688

In [112]:
ndf.head()

Unnamed: 0,11,12,13,14,15
10,0.520999,0.609558,0.11532,0.504631,0.287643
11,obj,0.684989,0.573067,0.435173,0.99173
12,0.994052,555.0,0.715142,0.644942,0.399544
13,0.528942,0.688862,0.395623,0.119175,0.383105
14,0.416541,0.517179,0.357954,0.527374,0.819736


In [113]:
ndf.reset_index()

Unnamed: 0,index,11,12,13,14,15
0,10,0.520999,0.609558,0.115320,0.504631,0.287643
1,11,obj,0.684989,0.573067,0.435173,0.991730
2,12,0.994052,555.000000,0.715142,0.644942,0.399544
3,13,0.528942,0.688862,0.395623,0.119175,0.383105
4,14,0.416541,0.517179,0.357954,0.527374,0.819736
...,...,...,...,...,...,...
295,305,0.62013,0.640198,0.696437,0.930883,0.126740
296,306,0.065883,0.780831,0.773273,0.589305,0.074464
297,307,0.734549,0.466462,0.111590,0.380867,0.190806
298,308,0.989246,0.495159,0.007100,0.823159,0.277637


In [114]:
ndf.reset_index(drop = True)

Unnamed: 0,11,12,13,14,15
0,0.520999,0.609558,0.115320,0.504631,0.287643
1,obj,0.684989,0.573067,0.435173,0.991730
2,0.994052,555.000000,0.715142,0.644942,0.399544
3,0.528942,0.688862,0.395623,0.119175,0.383105
4,0.416541,0.517179,0.357954,0.527374,0.819736
...,...,...,...,...,...
295,0.62013,0.640198,0.696437,0.930883,0.126740
296,0.065883,0.780831,0.773273,0.589305,0.074464
297,0.734549,0.466462,0.111590,0.380867,0.190806
298,0.989246,0.495159,0.007100,0.823159,0.277637


In [115]:
ndf.shape 

(300, 5)

In [116]:
ndf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 300 entries, 10 to 309
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   11      300 non-null    object 
 1   12      300 non-null    float64
 2   13      300 non-null    float64
 3   14      300 non-null    float64
 4   15      300 non-null    float64
dtypes: float64(4), object(1)
memory usage: 22.2+ KB


In [123]:
ndf[15].value_counts(dropna=False)

0.287643    1
0.902672    1
0.566178    1
0.527977    1
0.927615    1
           ..
0.307204    1
0.645728    1
0.184432    1
0.033662    1
0.335707    1
Name: 15, Length: 300, dtype: int64

In [124]:
ndf.notnull()

Unnamed: 0,11,12,13,14,15
10,True,True,True,True,True
11,True,True,True,True,True
12,True,True,True,True,True
13,True,True,True,True,True
14,True,True,True,True,True
...,...,...,...,...,...
305,True,True,True,True,True
306,True,True,True,True,True
307,True,True,True,True,True
308,True,True,True,True,True


In [125]:
ndf.isnull()

Unnamed: 0,11,12,13,14,15
10,False,False,False,False,False
11,False,False,False,False,False
12,False,False,False,False,False
13,False,False,False,False,False
14,False,False,False,False,False
...,...,...,...,...,...
305,False,False,False,False,False
306,False,False,False,False,False
307,False,False,False,False,False
308,False,False,False,False,False
