# Essential basic functionality

 On this notebook:
        -->Head and tail
        -->Attributes and underlying data
        -->Accelerated operations
        -->Flexible binary operations
        -->Descriptive statistics
        -->Function application
        -->Reindexing and altering labels
        -->Iteration
        -->.dt accessor
        -->Vectorized string methods
        -->Sorting
        -->Copying
        -->dtypes
        
        Selecting columns based on dtype

In [30]:
import pandas as pd
import numpy as np

In [31]:
index_date=pd.date_range("2023/01/01",periods=12)

In [32]:
index_date

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08',
               '2023-01-09', '2023-01-10', '2023-01-11', '2023-01-12'],
              dtype='datetime64[ns]', freq='D')

In [33]:
s=pd.Series(np.random.randn(12),index_date)
s

2023-01-01   -0.319003
2023-01-02    0.131335
2023-01-03    0.656373
2023-01-04   -0.045181
2023-01-05    0.770868
2023-01-06    1.474934
2023-01-07   -1.426974
2023-01-08    1.195752
2023-01-09    0.484708
2023-01-10   -0.263645
2023-01-11    0.083522
2023-01-12    0.620004
Freq: D, dtype: float64

In [34]:
s=pd.Series(np.random.randn(12),index_date,columns=["date", "random"])
#it shows TypeError . columns work with only DataFrame


TypeError: Series.__init__() got an unexpected keyword argument 'columns'

In [35]:
s_frame=pd.DataFrame(s)

In [36]:
s_frame

Unnamed: 0,0
2023-01-01,-0.319003
2023-01-02,0.131335
2023-01-03,0.656373
2023-01-04,-0.045181
2023-01-05,0.770868
2023-01-06,1.474934
2023-01-07,-1.426974
2023-01-08,1.195752
2023-01-09,0.484708
2023-01-10,-0.263645


Now i can add column name of the data frame are given above

In [37]:
s_frame=pd.DataFrame(np.random.randn(12,1),index=index_date,columns=["Random"])
s_frame

Unnamed: 0,Random
2023-01-01,2.421858
2023-01-02,0.190959
2023-01-03,-1.49066
2023-01-04,1.113109
2023-01-05,0.105594
2023-01-06,0.052087
2023-01-07,0.186278
2023-01-08,-1.863242
2023-01-09,-0.00221
2023-01-10,-0.056097


# head and tail
To view a small sample of a Series or DataFrame object, use the head() and tail() methods. The default number of elements to display is five, but you may pass a custom number.

In [38]:
long_series=pd.Series(np.random.randn(1000))
long_series

0     -0.295037
1     -0.527053
2      1.451945
3      3.141258
4      1.616879
         ...   
995    0.313910
996   -0.786231
997    0.322406
998    0.077873
999    0.739662
Length: 1000, dtype: float64

In [39]:
long_series.head()

0   -0.295037
1   -0.527053
2    1.451945
3    3.141258
4    1.616879
dtype: float64

In [40]:
long_series.tail()

995    0.313910
996   -0.786231
997    0.322406
998    0.077873
999    0.739662
dtype: float64

In [41]:
long_series.head(26)

0    -0.295037
1    -0.527053
2     1.451945
3     3.141258
4     1.616879
5     1.509595
6     0.032897
7    -0.183356
8     0.763997
9     0.200851
10    0.294762
11   -0.328328
12   -1.860734
13    0.652378
14   -0.092707
15   -0.552690
16   -0.170258
17    0.647371
18    1.093145
19    0.234474
20    0.517935
21   -0.051235
22    0.985776
23   -1.926569
24    0.326685
25   -0.016650
dtype: float64

In [42]:
long_series.tail(25)

975   -0.162431
976    0.518196
977    0.925297
978   -0.539110
979   -0.284435
980   -0.130717
981    1.415312
982   -1.166184
983   -0.108363
984    0.878214
985   -0.259211
986    0.174961
987    0.614809
988    1.950286
989   -1.376935
990   -0.329504
991   -1.390098
992   -0.215859
993    0.365131
994   -0.438146
995    0.313910
996   -0.786231
997    0.322406
998    0.077873
999    0.739662
dtype: float64

# Attributes and underlying data

pandas objects have a number of attributes enabling you to access the metadata

shape: gives the axis dimensions of the object, consistent with ndarray

Axis labels
Series: index (only axis)

DataFrame: index (rows) and columns

In [43]:
dff=pd.DataFrame(np.random.randn(12,3),index=index_date,columns=["A","B","C"])
dff

Unnamed: 0,A,B,C
2023-01-01,-1.566418,0.566816,0.894576
2023-01-02,-0.226941,-1.017023,0.177515
2023-01-03,1.825387,0.843581,-1.525787
2023-01-04,1.668802,0.795734,-0.617082
2023-01-05,-0.504954,-0.066633,1.169727
2023-01-06,0.00053,0.104552,-0.030618
2023-01-07,2.126447,-1.516415,0.036115
2023-01-08,-2.944385,0.858343,-1.275327
2023-01-09,-1.468866,0.671318,-0.288361
2023-01-10,1.107461,1.439474,-0.514358


In [44]:
dff[:6]

Unnamed: 0,A,B,C
2023-01-01,-1.566418,0.566816,0.894576
2023-01-02,-0.226941,-1.017023,0.177515
2023-01-03,1.825387,0.843581,-1.525787
2023-01-04,1.668802,0.795734,-0.617082
2023-01-05,-0.504954,-0.066633,1.169727
2023-01-06,0.00053,0.104552,-0.030618


In [45]:
dff[6:]

Unnamed: 0,A,B,C
2023-01-07,2.126447,-1.516415,0.036115
2023-01-08,-2.944385,0.858343,-1.275327
2023-01-09,-1.468866,0.671318,-0.288361
2023-01-10,1.107461,1.439474,-0.514358
2023-01-11,0.975288,-1.488739,-0.111715
2023-01-12,1.198423,0.745187,-1.91487


In [46]:
add=dff[:6]+dff[6:]
add

Unnamed: 0,A,B,C
2023-01-01,,,
2023-01-02,,,
2023-01-03,,,
2023-01-04,,,
2023-01-05,,,
2023-01-06,,,
2023-01-07,,,
2023-01-08,,,
2023-01-09,,,
2023-01-10,,,


In [47]:
dff.columns=[x.lower() for x in dff.columns]

In [48]:
dff

Unnamed: 0,a,b,c
2023-01-01,-1.566418,0.566816,0.894576
2023-01-02,-0.226941,-1.017023,0.177515
2023-01-03,1.825387,0.843581,-1.525787
2023-01-04,1.668802,0.795734,-0.617082
2023-01-05,-0.504954,-0.066633,1.169727
2023-01-06,0.00053,0.104552,-0.030618
2023-01-07,2.126447,-1.516415,0.036115
2023-01-08,-2.944385,0.858343,-1.275327
2023-01-09,-1.468866,0.671318,-0.288361
2023-01-10,1.107461,1.439474,-0.514358


In [49]:
add

Unnamed: 0,A,B,C
2023-01-01,,,
2023-01-02,,,
2023-01-03,,,
2023-01-04,,,
2023-01-05,,,
2023-01-06,,,
2023-01-07,,,
2023-01-08,,,
2023-01-09,,,
2023-01-10,,,


In [50]:
add.columns=[k.lower() for k in add.columns]

In [51]:
add

Unnamed: 0,a,b,c
2023-01-01,,,
2023-01-02,,,
2023-01-03,,,
2023-01-04,,,
2023-01-05,,,
2023-01-06,,,
2023-01-07,,,
2023-01-08,,,
2023-01-09,,,
2023-01-10,,,


In [52]:
dff

Unnamed: 0,a,b,c
2023-01-01,-1.566418,0.566816,0.894576
2023-01-02,-0.226941,-1.017023,0.177515
2023-01-03,1.825387,0.843581,-1.525787
2023-01-04,1.668802,0.795734,-0.617082
2023-01-05,-0.504954,-0.066633,1.169727
2023-01-06,0.00053,0.104552,-0.030618
2023-01-07,2.126447,-1.516415,0.036115
2023-01-08,-2.944385,0.858343,-1.275327
2023-01-09,-1.468866,0.671318,-0.288361
2023-01-10,1.107461,1.439474,-0.514358


In [53]:
dff.dtype

AttributeError: 'DataFrame' object has no attribute 'dtype'

In [None]:
dff.columns=[o.upper() for o in dff.columns]

In [None]:
# for DataFrame
dff.index.array

In [None]:
#for Series

In [None]:
index_date.dtype

In [None]:
index_date.array

In [None]:
s

In [None]:
s.to_numpy()

In [None]:
np.asarray(s)

In [None]:
np.asarray(dff)

In [None]:
dff.to_numpy()

In [None]:
dff.info()

In [None]:
ser = pd.Series(pd.date_range("01/05/2005", periods=2, tz="CET"))
ser

In [None]:
ser.to_numpy(dtype="object")

In [None]:
# or tthrown away datetime64[ns]

In [None]:
ser.to_numpy(dtype="datetime64[ns]")

In [None]:
pd.set_option("compute.use_bottleneck", False)
pd.set_option("compute.use_numexpr", False)

In [None]:
dff

In [54]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)
df

Unnamed: 0,one,two,three
a,-0.317233,-1.899355,
b,-0.262244,0.263663,-0.452029
c,-0.896267,-1.94138,0.210209
d,,-1.904837,1.955987


In [55]:
row = df.iloc[1]
row

one     -0.262244
two      0.263663
three   -0.452029
Name: b, dtype: float64

In [56]:
column = df["two"]
column

a   -1.899355
b    0.263663
c   -1.941380
d   -1.904837
Name: two, dtype: float64

In [57]:
df.sub(row, axis="columns")

Unnamed: 0,one,two,three
a,-0.054989,-2.163018,
b,0.0,0.0,0.0
c,-0.634023,-2.205044,0.662239
d,,-2.1685,2.408016


# Accelerated operations

These libraries are especially useful when dealing with large data sets, and provide large speedups. numexpr uses smart chunking, caching, and multiple cores. bottleneck is a set of specialized cython routines that are especially fast when dealing with arrays that have nans.

see the setting 
pd.set_option("compute.use_bottleneck", False)
pd.set_option("compute.use_numexpr", False)

In [58]:
pd.set_option("compute.use_bottleneck",False)
pd.set_option("compute.use_numexpr",False)

# Flexible binary operations

With binary operations between pandas data structures, there are two key points of interest:

    1.Broadcasting behavior between higher- (e.g. DataFrame) and 
      lower-dimensional (e.g. Series) objects.

    2.Missing data in computations.

#  broadcasting behavior

    DataFrame has the methods add(), sub(), mul(), div() and related functions radd(), rsub(), … for carrying out binary operations. For broadcasting behavior, Series input is of primary interest. Using these functions, you can use to either match on the index or columns via the axis keyword:

In [59]:
df

Unnamed: 0,one,two,three
a,-0.317233,-1.899355,
b,-0.262244,0.263663,-0.452029
c,-0.896267,-1.94138,0.210209
d,,-1.904837,1.955987


In [60]:
row=df.iloc[:1]

In [61]:
row

Unnamed: 0,one,two,three
a,-0.317233,-1.899355,


In [62]:
#sub
subb=df.sub(row,axis="columns")
subb

Unnamed: 0,one,two,three
a,0.0,0.0,
b,,,
c,,,
d,,,


In [63]:
column=df["one"]
column

a   -0.317233
b   -0.262244
c   -0.896267
d         NaN
Name: one, dtype: float64

In [64]:
Data={
    "Name":["Shakil","Sujon","neloy","sak","pak","jak"],
    "school":["I.E.T Govt high school","I.E.T Govt high school","I.E.T Govt high school","I.E.T Govt high school","I.E.T Govt high school","I.E.T Govt high school"],
    "Gruop" :["Science","Science","Commarce","Science","Science","Commarce"],
    "GPA":[5.00,4.56,4.00,5.00,4.56,4.00],
    
   
}

In [65]:
df=pd.DataFrame(Data)
df

Unnamed: 0,Name,school,Gruop,GPA
0,Shakil,I.E.T Govt high school,Science,5.0
1,Sujon,I.E.T Govt high school,Science,4.56
2,neloy,I.E.T Govt high school,Commarce,4.0
3,sak,I.E.T Govt high school,Science,5.0
4,pak,I.E.T Govt high school,Science,4.56
5,jak,I.E.T Govt high school,Commarce,4.0


In [66]:
pd.Series(data)

NameError: name 'data' is not defined

In [67]:
bruus=pd.DataFrame(Data)
bruus.tail(2) + bruus.head(2)
bruus

Unnamed: 0,Name,school,Gruop,GPA
0,Shakil,I.E.T Govt high school,Science,5.0
1,Sujon,I.E.T Govt high school,Science,4.56
2,neloy,I.E.T Govt high school,Commarce,4.0
3,sak,I.E.T Govt high school,Science,5.0
4,pak,I.E.T Govt high school,Science,4.56
5,jak,I.E.T Govt high school,Commarce,4.0


In [68]:
bruus.tail(2) + bruus.head(2)

Unnamed: 0,Name,school,Gruop,GPA
0,,,,
1,,,,
4,,,,
5,,,,


In [69]:
bruus["GPA"]

0    5.00
1    4.56
2    4.00
3    5.00
4    4.56
5    4.00
Name: GPA, dtype: float64

In [70]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)
df


Unnamed: 0,one,two,three
a,-0.22636,-1.203044,
b,0.767941,-1.132407,0.640466
c,0.824548,-0.367168,-0.394824
d,,1.540229,-1.414918


In [71]:
row=df.iloc[1]
row

one      0.767941
two     -1.132407
three    0.640466
Name: b, dtype: float64

In [72]:
column = df["two"]


In [73]:
oo=df.sub(row, axis="columns")

In [74]:
df

Unnamed: 0,one,two,three
a,-0.22636,-1.203044,
b,0.767941,-1.132407,0.640466
c,0.824548,-0.367168,-0.394824
d,,1.540229,-1.414918


In [75]:
row

one      0.767941
two     -1.132407
three    0.640466
Name: b, dtype: float64

In [76]:
oo

Unnamed: 0,one,two,three
a,-0.994302,-0.070637,
b,0.0,0.0,0.0
c,0.056606,0.765239,-1.03529
d,,2.672636,-2.055385


In [77]:
df

Unnamed: 0,one,two,three
a,-0.22636,-1.203044,
b,0.767941,-1.132407,0.640466
c,0.824548,-0.367168,-0.394824
d,,1.540229,-1.414918


In [78]:
column=df["two"]

In [79]:
row

one      0.767941
two     -1.132407
three    0.640466
Name: b, dtype: float64

In [80]:
column

a   -1.203044
b   -1.132407
c   -0.367168
d    1.540229
Name: two, dtype: float64

In [81]:
o=df.sub(row,axis="columns")

In [82]:
o

Unnamed: 0,one,two,three
a,-0.994302,-0.070637,
b,0.0,0.0,0.0
c,0.056606,0.765239,-1.03529
d,,2.672636,-2.055385


In [83]:
df

Unnamed: 0,one,two,three
a,-0.22636,-1.203044,
b,0.767941,-1.132407,0.640466
c,0.824548,-0.367168,-0.394824
d,,1.540229,-1.414918


In [84]:
ranDom={
    "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
    "two": pd.Series(np.random.randn(3), index=["a","b","c"]),
    "three": pd.Series(np.random.randn(3), index=["a","b","c"]),
}

In [85]:
ranDom=pd.DataFrame(ranDom)

In [86]:
ranDomCopy=ranDom.copy()

In [87]:
ranDomCopy

Unnamed: 0,one,two,three
a,1.982895,0.055095,0.392952
b,-0.235453,0.523515,-2.728555
c,-0.919993,-0.467969,0.835867


In [88]:
ranDom

Unnamed: 0,one,two,three
a,1.982895,0.055095,0.392952
b,-0.235453,0.523515,-2.728555
c,-0.919993,-0.467969,0.835867


In [89]:
rand_row_a=ranDom.iloc[0]

In [90]:
rand_row_a

one      1.982895
two      0.055095
three    0.392952
Name: a, dtype: float64

In [91]:
ranDom=ranDom.sub(rand_row_a,axis=1)

In [92]:
ranDom

Unnamed: 0,one,two,three
a,0.0,0.0,0.0
b,-2.218348,0.46842,-3.121508
c,-2.902888,-0.523063,0.442915


In [93]:
ranDom=ranDomCopy()

TypeError: 'DataFrame' object is not callable

In [94]:
df = pd.DataFrame(
    {
        "one": pd.Series(np.random.randn(3), index=["a", "b", "c"]),
        "two": pd.Series(np.random.randn(4), index=["a", "b", "c", "d"]),
        "three": pd.Series(np.random.randn(3), index=["b", "c", "d"]),
    }
)

In [95]:
dfmi.index = pd.MultiIndex.from_tuples(
    [(1, "a"), (1, "b"), (1, "c"), (2, "a")], names=["first", "second"]
)

NameError: name 'dfmi' is not defined

In [96]:
dfmi.index = pd.MultiIndex.from_tuples(
       [(1,"a"),(1,"b"),(2,"c"),(2,"d")], names=["Label","type"]

)


NameError: name 'dfmi' is not defined

In [97]:
dfmi

NameError: name 'dfmi' is not defined

In [98]:
dfmi.sub(column, axis=0, level="second")



NameError: name 'dfmi' is not defined

In [99]:
s = pd.Series(np.arange(10))


In [100]:
#from chatGTP

In [101]:
import pandas as pd

In [102]:
df1 = pd.DataFrame({'Number': [15, 12, 33]}, index=['Bangla', 'English', 'Math'])
df2=pd.DataFrame({'Number' : [40,55,65]},index=['English','Math','Science'])
df2

Unnamed: 0,Number
English,40
Math,55
Science,65


In [103]:
res=df1+df2
res

Unnamed: 0,Number
Bangla,
English,52.0
Math,88.0
Science,


In [104]:
df_ab = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})

In [105]:
df_ab+2

Unnamed: 0,A,B
0,3,6
1,4,7
2,5,8


# Missing data / operations with fill values

In [106]:
s

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [107]:
div, rem = divmod(s, 3)


In [108]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    2
7    2
8    2
9    3
dtype: int64

In [109]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    2
7    2
8    2
9    3
dtype: int64

In [110]:
idx = pd.Index(np.arange(10))

In [111]:
idx

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [112]:
div, rem = divmod(idx, 3)


In [113]:
s

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [114]:
div, rem = divmod(s, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6])

In [115]:
rem

0    0
1    1
2    2
3    0
4    0
5    1
6    1
7    2
8    2
9    3
dtype: int64

In [116]:
div

0    0
1    0
2    0
3    1
4    1
5    1
6    1
7    1
8    1
9    1
dtype: int64

In [117]:
df2 = df.copy()


In [118]:
df

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,,0.812111,-1.429981


In [119]:
df2

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,,0.812111,-1.429981


In [120]:
df+df2

Unnamed: 0,one,two,three
a,-1.49004,-0.142219,
b,0.459322,-1.295072,0.246318
c,0.092168,-0.111864,0.687168
d,,1.624221,-2.859961


In [121]:
df2["three"]["a"] = 1.0

In [122]:
df2

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,1.0
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,,0.812111,-1.429981


In [123]:
df.add(df2, fill_value=0)


Unnamed: 0,one,two,three
a,-1.49004,-0.142219,1.0
b,0.459322,-1.295072,0.246318
c,0.092168,-0.111864,0.687168
d,,1.624221,-2.859961


# Flexible comparisons

In [124]:
df

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,,0.812111,-1.429981


In [125]:
df["one"]["d"]=2.03

In [126]:
df

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,2.03,0.812111,-1.429981


In [127]:
df2=df.copy()
df2

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,2.03,0.812111,-1.429981


In [128]:
df.gt(df2)

Unnamed: 0,one,two,three
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [129]:
df.lt(df2)

Unnamed: 0,one,two,three
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [130]:
df.ne(df)

Unnamed: 0,one,two,three
a,False,False,True
b,False,False,False
c,False,False,False
d,False,False,False


# Boolean reductions


In [131]:
df

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,2.03,0.812111,-1.429981


In [132]:
df>0

Unnamed: 0,one,two,three
a,False,False,False
b,True,False,True
c,True,False,True
d,True,True,False


In [133]:
(df>0).all()

one      False
two      False
three    False
dtype: bool

In [134]:
(df>0).any()

one      True
two      True
three    True
dtype: bool

In [135]:
(df>0).any().any()

True

In [136]:
df.empty

False

In [137]:
pd.DataFrame(columns=list("ABC")).empty

True

In [138]:
df

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,2.03,0.812111,-1.429981


# Descriptive statistics

In [140]:
df.mean(0)

one      0.390181
two      0.009383
three   -0.321079
dtype: float64

In [141]:
df.mean(1)

a   -0.408065
b   -0.098239
c    0.111245
d    0.470710
dtype: float64

In [142]:
df.mean(3)

ValueError: No axis named 3 for object type DataFrame

In [144]:
df

Unnamed: 0,one,two,three
a,-0.74502,-0.071109,
b,0.229661,-0.647536,0.123159
c,0.046084,-0.055932,0.343584
d,2.03,0.812111,-1.429981


In [146]:
df.sum(0,skipna=False)

one      1.560725
two      0.037533
three         NaN
dtype: float64

In [149]:
d={
     'one':[10,5,15],
    'two':[12,8,17],
    'three':[15,10,16]
}
flame=pd.DataFrame(d)
flame

Unnamed: 0,one,two,three
0,10,12,15
1,5,8,10
2,15,17,16


In [152]:
s = pd.Series(["a", "a", "b", "b", "a", "a", np.nan, "c", "d", "a"])


In [153]:
s

0      a
1      a
2      b
3      b
4      a
5      a
6    NaN
7      c
8      d
9      a
dtype: object

In [154]:
frame = pd.DataFrame({"a": ["Yes", "Yes", "No", "No"], "b": range(4)})
frame

Unnamed: 0,a,b
0,Yes,0
1,Yes,1
2,No,2
3,No,3


In [155]:
frame.describe()

Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [156]:
frame.describe(include=["object"])

Unnamed: 0,a
count,4
unique,2
top,Yes
freq,2


In [157]:
frame.describe(include=["number"])


Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [160]:
frame.describe(include="all")


Unnamed: 0,a,b
count,4,4.0
unique,2,
top,Yes,
freq,2,
mean,,1.5
std,,1.290994
min,,0.0
25%,,0.75
50%,,1.5
75%,,2.25



# Index of min/max values

The idxmin() and idxmax() functions on Series and DataFrame compute the index labels with the minimum and maximum corresponding values:

In [178]:
sp=pd.Series(np.random.randn(12))

In [179]:
sp

0     0.354339
1    -0.144927
2    -1.190072
3     0.478935
4    -0.093735
5     0.336762
6     0.206070
7    -0.822009
8     0.091064
9    -0.389081
10    0.067496
11   -0.612779
dtype: float64

In [180]:
sp.idxmin()

2