# DataFrame Attributes

![image.png](attachment:39ac900a-4017-4dd2-8c58-1d114d1581ed.png)

In [6]:
import pandas as pd
olympic_data=pd.read_csv("https://raw.githubusercontent.com/svkarthik86/Advanced-python/main/olympics.csv",skiprows=4)
olympic_data.head(1)

Unnamed: 0,City,Edition,Sport,Discipline,Athlete,NOC,Gender,Event,Event_gender,Medal
0,Athens,1896,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100m freestyle,M,Gold


## index
- The index (row labels) of the DataFrame.

In [11]:
olympic_data.index

RangeIndex(start=0, stop=29216, step=1)

## column
- The column labels of the DataFrame.

In [14]:
olympic_data.columns

Index(['City', 'Edition', 'Sport', 'Discipline', 'Athlete', 'NOC', 'Gender',
       'Event', 'Event_gender', 'Medal'],
      dtype='object')

## axes
- Return a list representing the axes of the DataFrame.
- It has the row axis labels and column axis labels as the only members. They are returned in that order.

In [15]:
olympic_data.axes

[RangeIndex(start=0, stop=29216, step=1),
 Index(['City', 'Edition', 'Sport', 'Discipline', 'Athlete', 'NOC', 'Gender',
        'Event', 'Event_gender', 'Medal'],
       dtype='object')]

## ndim
- Return an int representing the number of axes / array dimensions.
- Return 1 if Series. Otherwise return 2 if DataFrame.

In [16]:
olympic_data.ndim # DF always 2D

2

## shape
- Return a tuple representing the dimensionality of the DataFrame.

In [18]:
olympic_data.shape   #multiplication of shape# total number of Elements

(29216, 10)

## dtype
- Return the dtypes in the DataFrame.
- This returns a Series with the data type of each column. The result’s index is the original DataFrame’s columns. Columns with mixed types are stored with the object dtype. See the User Guide for more.

In [20]:
olympic_data.dtypes

City            object
Edition          int64
Sport           object
Discipline      object
Athlete         object
NOC             object
Gender          object
Event           object
Event_gender    object
Medal           object
dtype: object

## count
- Count non-NA cells for each column or row.

In [21]:
olympic_data.count

<bound method DataFrame.count of           City  Edition      Sport       Discipline               Athlete  \
0       Athens     1896   Aquatics         Swimming         HAJOS, Alfred   
1       Athens     1896   Aquatics         Swimming      HERSCHMANN, Otto   
2       Athens     1896   Aquatics         Swimming     DRIVAS, Dimitrios   
3       Athens     1896   Aquatics         Swimming    MALOKINIS, Ioannis   
4       Athens     1896   Aquatics         Swimming    CHASAPIS, Spiridon   
...        ...      ...        ...              ...                   ...   
29211  Beijing     2008  Wrestling  Wrestling Gre-R        ENGLICH, Mirko   
29212  Beijing     2008  Wrestling  Wrestling Gre-R  MIZGAITIS, Mindaugas   
29213  Beijing     2008  Wrestling  Wrestling Gre-R       PATRIKEEV, Yuri   
29214  Beijing     2008  Wrestling  Wrestling Gre-R         LOPEZ, Mijain   
29215  Beijing     2008  Wrestling  Wrestling Gre-R        BAROEV, Khasan   

       NOC Gender                       Ev

In [24]:
import numpy as np
df = pd.DataFrame({"Person":["John", "Myla", "Lewis", "John", "Myla"],
                   "Age": [24., np.nan, 21., 33, 26],
                   "Single": [False, True, True, True, False]})
df

Unnamed: 0,Person,Age,Single
0,John,24.0,False
1,Myla,,True
2,Lewis,21.0,True
3,John,33.0,True
4,Myla,26.0,False


In [23]:
df.count

<bound method DataFrame.count of   Person   Age  Single
0   John  24.0   False
1   Myla   NaN    True
2  Lewis  21.0    True
3   John  33.0    True
4   Myla  26.0   False>

In [26]:
df.Person.count

<bound method Series.count of 0     John
1     Myla
2    Lewis
3     John
4     Myla
Name: Person, dtype: object>

In [28]:
df.Single.count

<bound method Series.count of 0    False
1     True
2     True
3     True
4    False
Name: Single, dtype: bool>

## T

In [29]:
olympic_data.T # transpose of DataFrame # row to col

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29206,29207,29208,29209,29210,29211,29212,29213,29214,29215
City,Athens,Athens,Athens,Athens,Athens,Athens,Athens,Athens,Athens,Athens,...,Beijing,Beijing,Beijing,Beijing,Beijing,Beijing,Beijing,Beijing,Beijing,Beijing
Edition,1896,1896,1896,1896,1896,1896,1896,1896,1896,1896,...,2008,2008,2008,2008,2008,2008,2008,2008,2008,2008
Sport,Aquatics,Aquatics,Aquatics,Aquatics,Aquatics,Aquatics,Aquatics,Aquatics,Aquatics,Aquatics,...,Wrestling,Wrestling,Wrestling,Wrestling,Wrestling,Wrestling,Wrestling,Wrestling,Wrestling,Wrestling
Discipline,Swimming,Swimming,Swimming,Swimming,Swimming,Swimming,Swimming,Swimming,Swimming,Swimming,...,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R,Wrestling Gre-R
Athlete,"HAJOS, Alfred","HERSCHMANN, Otto","DRIVAS, Dimitrios","MALOKINIS, Ioannis","CHASAPIS, Spiridon","CHOROPHAS, Efstathios","HAJOS, Alfred","ANDREOU, Joannis","CHOROPHAS, Efstathios","NEUMANN, Paul",...,"MINGUZZI, Andrea","FODOR, Zoltan","MAMBETOV, Asset","WHEELER, Adam","KHUSHTOV, Aslanbek","ENGLICH, Mirko","MIZGAITIS, Mindaugas","PATRIKEEV, Yuri","LOPEZ, Mijain","BAROEV, Khasan"
NOC,HUN,AUT,GRE,GRE,GRE,GRE,HUN,GRE,GRE,AUT,...,ITA,HUN,KAZ,USA,RUS,GER,LTU,ARM,CUB,RUS
Gender,Men,Men,Men,Men,Men,Men,Men,Men,Men,Men,...,Men,Men,Men,Men,Men,Men,Men,Men,Men,Men
Event,100m freestyle,100m freestyle,100m freestyle for sailors,100m freestyle for sailors,100m freestyle for sailors,1200m freestyle,1200m freestyle,1200m freestyle,400m freestyle,400m freestyle,...,74 - 84kg,74 - 84kg,84 - 96kg,84 - 96kg,84 - 96kg,84 - 96kg,96 - 120kg,96 - 120kg,96 - 120kg,96 - 120kg
Event_gender,M,M,M,M,M,M,M,M,M,M,...,M,M,M,M,M,M,M,M,M,M
Medal,Gold,Silver,Bronze,Gold,Silver,Bronze,Gold,Silver,Bronze,Gold,...,Gold,Silver,Bronze,Bronze,Gold,Silver,Bronze,Bronze,Gold,Silver


In [30]:
df.T

Unnamed: 0,0,1,2,3,4
Person,John,Myla,Lewis,John,Myla
Age,24.0,,21.0,33.0,26.0
Single,False,True,True,True,False


## empty 

In [32]:
olympic_data.empty # Df is empty 

False

In [33]:
df.empty

False

In [39]:
emp=pd.DataFrame([])

In [40]:
emp.empty

True

In [37]:
empty_df=pd.DataFrame([])
empty_df

In [42]:
olympic_data.values

array([['Athens', 1896, 'Aquatics', ..., '100m freestyle', 'M', 'Gold'],
       ['Athens', 1896, 'Aquatics', ..., '100m freestyle', 'M', 'Silver'],
       ['Athens', 1896, 'Aquatics', ..., '100m freestyle for sailors',
        'M', 'Bronze'],
       ...,
       ['Beijing', 2008, 'Wrestling', ..., '96 - 120kg', 'M', 'Bronze'],
       ['Beijing', 2008, 'Wrestling', ..., '96 - 120kg', 'M', 'Gold'],
       ['Beijing', 2008, 'Wrestling', ..., '96 - 120kg', 'M', 'Silver']],
      dtype=object)

# Data Validition

In [3]:
import pandas as pd
olympic_data=pd.read_csv("https://raw.githubusercontent.com/svkarthik86/Advanced-python/main/olympics.csv",skiprows=4)
olympic_data.head()

Unnamed: 0,City,Edition,Sport,Discipline,Athlete,NOC,Gender,Event,Event_gender,Medal
0,Athens,1896,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100m freestyle,M,Gold
1,Athens,1896,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100m freestyle,M,Silver
2,Athens,1896,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100m freestyle for sailors,M,Bronze
3,Athens,1896,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100m freestyle for sailors,M,Gold
4,Athens,1896,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100m freestyle for sailors,M,Silver


In [4]:
olympic_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29216 entries, 0 to 29215
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   City          29216 non-null  object
 1   Edition       29216 non-null  int64 
 2   Sport         29216 non-null  object
 3   Discipline    29216 non-null  object
 4   Athlete       29216 non-null  object
 5   NOC           29216 non-null  object
 6   Gender        29216 non-null  object
 7   Event         29216 non-null  object
 8   Event_gender  29216 non-null  object
 9   Medal         29216 non-null  object
dtypes: int64(1), object(9)
memory usage: 2.2+ MB


In [None]:
olympic_data.T # transpose of DataFrame # row to col