In [24]:
import numpy as np
import pandas as pd

In [25]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [26]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [27]:
df2 = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df2

Unnamed: 0,A,B,C,D
2013-01-01,-0.397386,-0.715087,-0.391808,-1.314698
2013-01-02,0.870462,-0.457835,1.410996,-0.474453
2013-01-03,0.623159,0.293814,-0.888933,-1.969085
2013-01-04,-1.298511,-0.920075,-0.588453,0.04662
2013-01-05,1.299773,1.978359,1.335986,-1.654436
2013-01-06,1.039364,0.043319,0.671977,-1.979758


In [28]:
df2 = pd.DataFrame({'A': range(4),
...: 'B': pd.Timestamp('20130102'),
...: 'C': pd.Series(np.arange(4), index=list(range(4)), dtype='float32'),
...: 'D': np.array([3] * 4, dtype='int32'),
...: 'E': pd.Categorical(["test", "train", "test", "train"]),
...: 'F': 'foo'})

df2

Unnamed: 0,A,B,C,D,E,F
0,0,2013-01-02,0.0,3,test,foo
1,1,2013-01-02,1.0,3,train,foo
2,2,2013-01-02,2.0,3,test,foo
3,3,2013-01-02,3.0,3,train,foo


In [29]:
np.arange(1,23)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22])

In [30]:
s=pd.Categorical(["Yogesh","Mukesh","Hari","Neema"])
s

[Yogesh, Mukesh, Hari, Neema]
Categories (4, object): [Hari, Mukesh, Neema, Yogesh]

In [31]:
p = pd.Categorical(["Student","Student","Government Job","HouseWife"])
p

[Student, Student, Government Job, HouseWife]
Categories (3, object): [Government Job, HouseWife, Student]

In [32]:
df = pd.DataFrame({"SNo.":np.arange(1,5),
                  "Name":s,
                  "Age":np.array([19,15,49,42]),
                  "Profession":p
                  })
df


Unnamed: 0,SNo.,Name,Age,Profession
0,1,Yogesh,19,Student
1,2,Mukesh,15,Student
2,3,Hari,49,Government Job
3,4,Neema,42,HouseWife


In [33]:
df.columns


Index(['SNo.', 'Name', 'Age', 'Profession'], dtype='object')

In [34]:
df.to_numpy() #each row converted to an array


array([[1, 'Yogesh', 19, 'Student'],
       [2, 'Mukesh', 15, 'Student'],
       [3, 'Hari', 49, 'Government Job'],
       [4, 'Neema', 42, 'HouseWife']], dtype=object)

In [35]:
df.describe()   #describe() shows a quick statistic summary of your data:

Unnamed: 0,SNo.,Age
count,4.0,4.0
mean,2.5,31.25
std,1.290994,16.780445
min,1.0,15.0
25%,1.75,18.0
50%,2.5,30.5
75%,3.25,43.75
max,4.0,49.0


In [36]:
df.T #Transpose the DataFrame

Unnamed: 0,0,1,2,3
SNo.,1,2,3,4
Name,Yogesh,Mukesh,Hari,Neema
Age,19,15,49,42
Profession,Student,Student,Government Job,HouseWife


In [37]:
df.sort_index(axis=1, ascending=False)  #sort on the basis of axis(1=>x,0=>y)

Unnamed: 0,SNo.,Profession,Name,Age
0,1,Student,Yogesh,19
1,2,Student,Mukesh,15
2,3,Government Job,Hari,49
3,4,HouseWife,Neema,42


In [38]:
df.iloc[3] 

SNo.                  4
Name              Neema
Age                  42
Profession    HouseWife
Name: 3, dtype: object

In [39]:
df.iloc[0:4, 0:2]   

Unnamed: 0,SNo.,Name
0,1,Yogesh
1,2,Mukesh
2,3,Hari
3,4,Neema


In [40]:
df.iloc[[1, 2, 3], [0, 2]]

Unnamed: 0,SNo.,Age
1,2,15
2,3,49
3,4,42


In [41]:
df.iloc[1:3, :]

Unnamed: 0,SNo.,Name,Age,Profession
1,2,Mukesh,15,Student
2,3,Hari,49,Government Job


In [42]:
df.iloc[:,0:3]

Unnamed: 0,SNo.,Name,Age
0,1,Yogesh,19
1,2,Mukesh,15
2,3,Hari,49
3,4,Neema,42


In [43]:
df.iloc[1, 0]

2

In [44]:
df[df['SNo.'] <3]

Unnamed: 0,SNo.,Name,Age,Profession
0,1,Yogesh,19,Student
1,2,Mukesh,15,Student


In [45]:
df[df['Age']<19]

Unnamed: 0,SNo.,Name,Age,Profession
1,2,Mukesh,15,Student


In [46]:
df2 = df.copy()
df2['Earning']=[2000,0,300000,0]
df2

Unnamed: 0,SNo.,Name,Age,Profession,Earning
0,1,Yogesh,19,Student,2000
1,2,Mukesh,15,Student,0
2,3,Hari,49,Government Job,300000
3,4,Neema,42,HouseWife,0


In [47]:
df2[df2['Earning'].isin([2000, 300000])] #isin() method for filtering:

Unnamed: 0,SNo.,Name,Age,Profession,Earning
0,1,Yogesh,19,Student,2000
2,3,Hari,49,Government Job,300000


In [48]:
df2[df2['Name'].isin(["Yogesh"])]   

Unnamed: 0,SNo.,Name,Age,Profession,Earning
0,1,Yogesh,19,Student,2000


In [49]:
df.iat[0, 0] = 0   #indexat 0,0
df

Unnamed: 0,SNo.,Name,Age,Profession
0,0,Yogesh,19,Student
1,2,Mukesh,15,Student
2,3,Hari,49,Government Job
3,4,Neema,42,HouseWife


In [50]:
df.loc[:, 'Age'] = np.array([19,15,49,42])  #Selecting on a multi-axis by label:
df

Unnamed: 0,SNo.,Name,Age,Profession
0,0,Yogesh,19,Student
1,2,Mukesh,15,Student
2,3,Hari,49,Government Job
3,4,Neema,42,HouseWife


In [51]:
df.loc[0:2,['Name','Age']]  #Specific column data based on range of index

Unnamed: 0,Name,Age
0,Yogesh,19
1,Mukesh,15
2,Hari,49


In [52]:
df.loc[0, ['Name', 'Profession']]   #specific column data based on particular index

Name           Yogesh
Profession    Student
Name: 0, dtype: object

In [53]:
df3= pd.DataFrame({"Cost":np.array([24.5,4.5,7.0]),
                 "Items Purchased":pd.Categorical(["Dog Food","Kity Litter","Bird Seed"]),
                 "Name":pd.Categorical(["Chris","Keyn","Vinod"]),}
                 )
df3

Unnamed: 0,Cost,Items Purchased,Name
0,24.5,Dog Food,Chris
1,4.5,Kity Litter,Keyn
2,7.0,Bird Seed,Vinod


In [54]:
ind = pd.Categorical(["Store 1","Store 2","Store 3"])
df3.index=ind
df3

Unnamed: 0,Cost,Items Purchased,Name
Store 1,24.5,Dog Food,Chris
Store 2,4.5,Kity Litter,Keyn
Store 3,7.0,Bird Seed,Vinod


In [55]:
df3.drop('Store 1')

Unnamed: 0,Cost,Items Purchased,Name
Store 2,4.5,Kity Litter,Keyn
Store 3,7.0,Bird Seed,Vinod


In [56]:
df3

Unnamed: 0,Cost,Items Purchased,Name
Store 1,24.5,Dog Food,Chris
Store 2,4.5,Kity Litter,Keyn
Store 3,7.0,Bird Seed,Vinod


In [57]:
copy=df3.copy()
copy=copy.drop('Store 3')
copy

Unnamed: 0,Cost,Items Purchased,Name
Store 1,24.5,Dog Food,Chris
Store 2,4.5,Kity Litter,Keyn


In [58]:
df3["Location"]= [1,2,3]
df3

Unnamed: 0,Cost,Items Purchased,Name,Location
Store 1,24.5,Dog Food,Chris,1
Store 2,4.5,Kity Litter,Keyn,2
Store 3,7.0,Bird Seed,Vinod,3


In [59]:
df3.mean()

Cost        12.0
Location     2.0
dtype: float64

In [60]:
df3.mean(1)  #horizontal mean

Store 1    12.75
Store 2     3.25
Store 3     5.00
dtype: float64

In [61]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [62]:
df3.sub(s, axis='index')

  return self._values.ravel(order=order)


Unnamed: 0,Cost,Items Purchased,Name,Location
2013-01-01 00:00:00,,,,
2013-01-02 00:00:00,,,,
2013-01-03 00:00:00,,,,
2013-01-04 00:00:00,,,,
2013-01-05 00:00:00,,,,
2013-01-06 00:00:00,,,,
Store 1,,,,
Store 2,,,,
Store 3,,,,


In [63]:
df3.apply(np.cumsum)


Unnamed: 0,Cost,Items Purchased,Name,Location
Store 1,24.5,Dog Food,Chris,1
Store 2,29.0,Dog FoodKity Litter,ChrisKeyn,3
Store 3,36.0,Dog FoodKity LitterBird Seed,ChrisKeynVinod,6


In [64]:
df3

Unnamed: 0,Cost,Items Purchased,Name,Location
Store 1,24.5,Dog Food,Chris,1
Store 2,4.5,Kity Litter,Keyn,2
Store 3,7.0,Bird Seed,Vinod,3


In [293]:
df4=pd.DataFrame({"yogesh":np.array([2,3,54,5])})
df4.index=[1,2,3,4]
df4

Unnamed: 0,yogesh
1,2
2,3
3,54
4,5


In [295]:
df3.to_excel("D:\Data Science\l.xlsx")

In [66]:
df4.apply(lambda y: y.max() - y.min())
df4.apply(lambda x:x*4)

Unnamed: 0,1
1,8
2,12
3,216
4,20


In [249]:
df5=df4.where(df4["yogesh"]>51)  #where is used to boolean mask the data
df5                        

Unnamed: 0,yogesh
1,
2,
3,54.0
4,


In [68]:
df5=df5.dropna()
df5

Unnamed: 0,1
3,54.0


In [95]:
(df3['Cost']<7.0)| df3['Location']>1

Store 1    False
Store 2    False
Store 3    False
dtype: bool

In [112]:
ss = pd.Series(np.random.randint(0, 3, size=10))
ss

0    1
1    1
2    2
3    2
4    2
5    2
6    2
7    1
8    0
9    0
dtype: int32

In [128]:
ss.value_counts()  #count the number of occurence of any dtype

2    5
1    3
0    2
dtype: int64

In [130]:
sb = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
sb

0       A
1       B
2       C
3    Aaba
4    Baca
5     NaN
6    CABA
7     dog
8     cat
dtype: object

In [146]:
# sb.str.lower()  #convert to lowercase
# sb.str.upper()  #convert to uppercase
# sb.str.capitalize()  #capitilize the first letter
# sb.str.encode('UTF-8')  #encode the string to a format

In [171]:
n=pd.Categorical(['Yogesh','Mukesh','Shrey','Pramod'])
dsa=pd.DataFrame({"Name":n})
dsa.index=[1,2,3,4]
pieces = [dsa[:1],dsa[1:3]]
pd.concat(pieces)   #concat used to concat the pices of DataFrame into single unit


Unnamed: 0,Name
1,Yogesh
2,Mukesh
3,Shrey


# DEMONSTRATION WITH REAL DATA

In [284]:
df6=pd.read_csv('test.csv')
r=df6['Series_title_1']
r.value_counts()
df7= df6.copy()
df7=df7.fillna(1.9) #fill with NaN with a number
df7.index = np.arange(1,3793)
df7
df7.T
df7.memory_usage()
df7.apply(lambda x: x[2]*4)
s=pd.Series(df7['Data_value'])
df7['Data_value']=s*4
df7

Unnamed: 0,Series_reference,Period,Data_value,Suppressed,STATUS,UNITS,Magnitude,Subject,Group,Series_title_1,Series_title_2,Series_title_3,Series_title_4,Series_title_5
1,ECTM.S1AG1210,2007.03,925.1925.1925.1925.1,1.9,F,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Actual,Supermarket and grocery stores,1.9,1.9,1.9
2,ECTM.S1AG1210,2007.04,880880880880,1.9,F,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Actual,Supermarket and grocery stores,1.9,1.9,1.9
3,ECTM.S1AG1210,2007.05,905.1905.1905.1905.1,1.9,F,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Actual,Supermarket and grocery stores,1.9,1.9,1.9
4,ECTM.S1AG1210,2007.06,874.3874.3874.3874.3,1.9,F,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Actual,Supermarket and grocery stores,1.9,1.9,1.9
5,ECTM.S1AG1210,2007.07,890.5890.5890.5890.5,1.9,F,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Actual,Supermarket and grocery stores,1.9,1.9,1.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3788,ECTM.S1SQQ112,2019.12,210.2210.2210.2210.2,1.9,R,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Seasonally adjusted,Medical and Other Health Care Services,1.9,1.9,1.9
3789,ECTM.S1SQQ112,2020.01,208208208208,1.9,R,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Seasonally adjusted,Medical and Other Health Care Services,1.9,1.9,1.9
3790,ECTM.S1SQQ112,2020.02,192.3192.3192.3192.3,1.9,R,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Seasonally adjusted,Medical and Other Health Care Services,1.9,1.9,1.9
3791,ECTM.S1SQQ112,2020.03,181.4181.4181.4181.4,1.9,R,Dollars,6,Electronic Card Transactions (ANZSIC06) - ECT,Private - Values - Electronic card transaction...,Seasonally adjusted,Medical and Other Health Care Services,1.9,1.9,1.9
