# Analyzing Numeric Pandas Series

In [37]:
import pandas as pd

In [38]:
titanic = pd.read_csv('titanic.csv')

# Picking a column to analyze data as series

In [39]:
age = titanic['age']

In [40]:
age

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: age, Length: 891, dtype: float64

In [14]:
#Summary statistics of numeric coloumns
age.describe()

count    714.000000
mean      29.699118
std       14.526497
min        0.420000
25%       20.125000
50%       28.000000
75%       38.000000
max       80.000000
Name: age, dtype: float64

In [20]:
#Aggregates how many sum counts were present for particular data
age.value_counts()

24.00    30
22.00    27
18.00    26
19.00    25
28.00    25
         ..
36.50     1
55.50     1
0.92      1
23.50     1
74.00     1
Name: age, Length: 88, dtype: int64

In [22]:
#Average age on titanic ship
age.mean()

29.69911764705882

In [23]:
#50th percentile of age 
age.median()

28.0

In [25]:
#Most common age group in titanic ship
age.mode()

0    24.0
Name: age, dtype: float64

In [27]:
#Standard Deviation age from mean value
age.std()

14.526497332334044

In [32]:
#Minimum age of passenger in titanic(4months Old baby)
age.min()

0.42

In [31]:
#Maximum age of passenger in titanic (80 Years Old)
age.max()

80.0

In [36]:
#Returns an array of unique data from Age series 
age.unique()

array([22.  , 38.  , 26.  , 35.  ,   nan, 54.  ,  2.  , 27.  , 14.  ,
        4.  , 58.  , 20.  , 39.  , 55.  , 31.  , 34.  , 15.  , 28.  ,
        8.  , 19.  , 40.  , 66.  , 42.  , 21.  , 18.  ,  3.  ,  7.  ,
       49.  , 29.  , 65.  , 28.5 ,  5.  , 11.  , 45.  , 17.  , 32.  ,
       16.  , 25.  ,  0.83, 30.  , 33.  , 23.  , 24.  , 46.  , 59.  ,
       71.  , 37.  , 47.  , 14.5 , 70.5 , 32.5 , 12.  ,  9.  , 36.5 ,
       51.  , 55.5 , 40.5 , 44.  ,  1.  , 61.  , 56.  , 50.  , 36.  ,
       45.5 , 20.5 , 62.  , 41.  , 52.  , 63.  , 23.5 ,  0.92, 43.  ,
       60.  , 10.  , 64.  , 13.  , 48.  ,  0.75, 53.  , 57.  , 80.  ,
       70.  , 24.5 ,  6.  ,  0.67, 30.5 ,  0.42, 34.5 , 74.  ])

In [35]:
#Returns the total unique counts from data
age.nunique()

88

In [43]:
#Data appears in original format without any sorting
age.value_counts(sort=False)

22.00    27
38.00    11
26.00    18
35.00    18
54.00     8
         ..
0.67      1
30.50     2
0.42      1
34.50     1
74.00     1
Name: age, Length: 88, dtype: int64

In [46]:
age.value_counts(dropna = False)

NaN      177
24.00     30
22.00     27
18.00     26
28.00     25
        ... 
36.50      1
55.50      1
0.92       1
23.50      1
74.00      1
Name: age, Length: 89, dtype: int64

# Analyzing Non-Numeric Pandas Series

In [48]:
summer = pd.read_csv('summer.csv')

In [49]:
summer

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...,...
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


In [54]:
summer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31165 entries, 0 to 31164
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Year        31165 non-null  int64 
 1   City        31165 non-null  object
 2   Sport       31165 non-null  object
 3   Discipline  31165 non-null  object
 4   Athlete     31165 non-null  object
 5   Country     31161 non-null  object
 6   Gender      31165 non-null  object
 7   Event       31165 non-null  object
 8   Medal       31165 non-null  object
dtypes: int64(1), object(8)
memory usage: 2.1+ MB


In [57]:
#Creating a Panda series named athlete
athlete = summer['Athlete']

In [58]:
athlete.head()

0         HAJOS, Alfred
1      HERSCHMANN, Otto
2     DRIVAS, Dimitrios
3    MALOKINIS, Ioannis
4    CHASAPIS, Spiridon
Name: Athlete, dtype: object

In [60]:
#Checking datatype of Athlete variable
type(athlete)

pandas.core.series.Series

In [63]:
#Checking the shape of Pandas series
athlete.shape

(31165,)

In [64]:
athlete.describe()

count               31165
unique              22762
top       PHELPS, Michael
freq                   22
Name: Athlete, dtype: object

In [65]:
athlete.count()

31165

In [66]:
#Returns the alphabetical record
athlete.min()

'AABYE, Edgar'

In [67]:
#Returns the alphabetical record
athlete.max()

'ÖSTRAND, Per-Olof'

In [68]:
athlete.unique()

array(['HAJOS, Alfred', 'HERSCHMANN, Otto', 'DRIVAS, Dimitrios', ...,
       'TOTROV, Rustam', 'ALEKSANYAN, Artur', 'LIDBERG, Jimmy'],
      dtype=object)

In [69]:
athlete.nunique()

22762

In [70]:
athlete.value_counts()

PHELPS, Michael          22
LATYNINA, Larisa         18
ANDRIANOV, Nikolay       15
ONO, Takashi             13
MANGIAROTTI, Edoardo     13
                         ..
ZAKA, Uddin               1
ZAFAR, Hayat              1
MUHAMMAD, Rashid          1
MANNA, Muhammad Afzal     1
LIDBERG, Jimmy            1
Name: Athlete, Length: 22762, dtype: int64