## Dataframe and Series

### Series

In [36]:
import numpy as np
import pandas as pd

# Series based on array
series_from_list = pd.Series(data=[1, 2, 3, 4, 5.3],
                             index=['row_1', 'row_2', 'row_3', 'row_4', 'row_5'],
                             dtype='string',
                             name='pd_Series'
                             )

print(series_from_list)


row_1      1
row_2      2
row_3      3
row_4      4
row_5    5.3
Name: pd_Series, dtype: string


In [13]:
dict_array = {
    'row_1': 2,
    'row_2': 3,
    'row_3': 4,
    'row_4': 5,
    'row_5': 6
}

# Series based on dict
series_from_dict = pd.Series(dict_array,
                             index=['row_3', 'row_4', 'row_100'],
                             dtype=int,
                             name='pd_Series'
                             )
print(series_from_dict)


row_3      4.0
row_4      5.0
row_100    NaN
Name: pd_Series, dtype: float64


In [None]:
# Copy Series
np_array = np.arange(1, 21)
series_from_np_array = pd.Series(np_array,
                                 copy=False
                                 )

series_from_np_array.iloc[3] = 100

print("Original: ", np_array)

np_array_2 = np.arange(1, 16)
series_from_np_array_2 = pd.Series(np_array_2,
                                   copy=True)

series_from_np_array_2.iloc[3] = 200

print("Copy", np_array_2)



Original:  [  1   2   3 100   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20]
Copy [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]


In [19]:
# Series based on other series
series_list = pd.Series(data=[1, 2, 3, 4, 5],
                        index=['row_1', 'row_2', 'row_3', 'row_4', 'row_5'],
                        dtype='string',
                        name='pd_Series_1'
                        )

series_from_series = pd.Series(data=series_list,
                               index=['row_2', 'row_5', 'row_1'])

print(series_from_series)



row_2    2
row_5    5
row_1    1
Name: pd_Series_1, dtype: string


### DataFrame

In [25]:
dict_array = {
    'col1': [1, 2, 3, 4],
    'col2': [5, 6, 7, 8],
    'col3': [9, 10, 11, 12],
    'col4': [13, 14, 15, 16]
}
# DataFrame based on dict
dateframe_from_dict = pd.DataFrame(
    data=dict_array,
    index=['row_1', 'row_2', 'row_3', 'row_4'],
    columns=['col3', 'col4', 'col1'],
    dtype=float
)

print(dateframe_from_dict)

       col3  col4  col1
row_1   9.0  13.0   1.0
row_2  10.0  14.0   2.0
row_3  11.0  15.0   3.0
row_4  12.0  16.0   4.0


In [28]:
# DataFrame based on numpy array WITH COPY PARAM
np_array = [
    [1, 2, 3, 4, 5],
    [6, 7, 8, 9, 10],
    [11, 12, 13, 14, 15],
    [16, 17, 18, 19, 20]
]

# DateFrame based on numpy array
df_1 = pd.DataFrame(data=np_array,
                    copy=True)
df_1.iloc[:, 2] = 100

print(df_1)
print(np_array)

    0   1    2   3   4
0   1   2  100   4   5
1   6   7  100   9  10
2  11  12  100  14  15
3  16  17  100  19  20
[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15], [16, 17, 18, 19, 20]]


In [30]:
dict_array = {
    'row_1': 2,
    'row_2': 3,
    'row_3': 4,
    'row_4': 5,
    'row_5': 6
}

array = np.array([1, 2, 3, 4, 5])

series_1 = pd.Series(dict_array)
series_2 = pd.Series(array, index=['row_3', 'row_4', 'row_5', 'row_6', 'row_9'])

df = pd.DataFrame(data={'col1': series_1, 'col2': series_2},
                  index=['row_1', 'row_7'])

print(df)

       col1  col2
row_1   2.0   NaN
row_7   NaN   NaN


## Read and Write to Files

In [None]:
# With default sep ,
df_1 = pd.read_csv('content/avito_data.csv')
print(df_1)


        user_id            city               category_name    price  \
0  dbe73ad6e4b5       Волгоград      Детская одежда и обувь      NaN   
1  2e11806abe57     Нижняя Тура                  Велосипеды   3000.0   
2  0b850bbebb10          Бердск               Аудио и видео  15000.0   
3  5f1d5c3ce0da         Саратов             Бытовая техника   4500.0   
4  23e2d97bfc7f         Бузулук  Товары для детей и игрушки   4900.0   
5  c2a632af2602  Ростов-на-Дону      Ремонт и строительство    500.0   
6  b239811ad530        Оренбург                    Ноутбуки  20990.0   
7  d85fa02e6341     Калининград                    Телефоны    990.0   
8  ae6586719bec     Новосибирск       Товары для компьютера   1200.0   
9  30ad26d633ef       Полесской      Детская одежда и обувь    400.0   

  activation_date user_type  year  month  day  
0      2017-04-18   Private  2017      4   18  
1      2017-04-16   Private  2017      4   16  
2      2017-04-17   Private  2017      4   17  
3      2017-04-

In [35]:
# With sep ;
df_2 = pd.read_csv('content/avito_sep.csv',
                   sep=';')
print(df_2)

        user_id            city               category_name    price  \
0  dbe73ad6e4b5       Волгоград      Детская одежда и обувь      NaN   
1  2e11806abe57     Нижняя Тура                  Велосипеды   3000.0   
2  0b850bbebb10          Бердск               Аудио и видео  15000.0   
3   5f1d53ce0da         Саратов             Бытовая техника   4500.0   
4  23e2d97bfc7f         Бузулук  Товары для детей и игрушки   4900.0   
5  c2a632af2602  Ростов-на-Дону      Ремонт и строительство    500.0   
6  b239811ad530        Оренбург                    Ноутбуки  20990.0   
7  d85fa02e6341     Калининград                    Телефоны    990.0   
8  ae6586719bec     Новосибирск       Товары для компьютера   1200.0   
9  30ad26d633ef       Полевской      Детская одежда и обувь    400.0   

  activation_date user_type  year  month  day  
0      2017-04-18   Private  2017      4   18  
1      2017-04-16   Private  2017      4   16  
2      2017-04-17   Private  2017      4   17  
3      2017-04-

In [41]:
import pandas as pd

df_index_col = pd.read_csv('content/avito_sep.csv',
                           sep=';',
                           index_col=['user_type', 'city']
                           )

print(df_index_col)

                               user_id               category_name    price  \
user_type city                                                                
Private   Волгоград       dbe73ad6e4b5      Детская одежда и обувь      NaN   
          Нижняя Тура     2e11806abe57                  Велосипеды   3000.0   
          Бердск          0b850bbebb10               Аудио и видео  15000.0   
          Саратов          5f1d53ce0da             Бытовая техника   4500.0   
          Бузулук         23e2d97bfc7f  Товары для детей и игрушки   4900.0   
          Ростов-на-Дону  c2a632af2602      Ремонт и строительство    500.0   
Shop      Оренбург        b239811ad530                    Ноутбуки  20990.0   
          Калининград     d85fa02e6341                    Телефоны    990.0   
Company   Новосибирск     ae6586719bec       Товары для компьютера   1200.0   
Private   Полевской       30ad26d633ef      Детская одежда и обувь    400.0   

                         activation_date  year  mon

In [43]:
df_usecols = pd.read_csv('content/avito_sep.csv',
                         sep=';',
                         index_col=['city'],
                         usecols=['user_id', 'city', 'price', 'activation_date']
                         )

print(df_usecols)

                     user_id    price activation_date
city                                                 
Волгоград       dbe73ad6e4b5      NaN      2017-04-18
Нижняя Тура     2e11806abe57   3000.0      2017-04-16
Бердск          0b850bbebb10  15000.0      2017-04-17
Саратов          5f1d53ce0da   4500.0      2017-04-17
Бузулук         23e2d97bfc7f   4900.0      2017-04-15
Ростов-на-Дону  c2a632af2602    500.0      2017-04-12
Оренбург        b239811ad530  20990.0      2017-04-17
Калининград     d85fa02e6341    990.0      2017-04-18
Новосибирск     ae6586719bec   1200.0      2017-04-18
Полевской       30ad26d633ef    400.0      2017-04-12


In [46]:
# test argument Squeeze - return Series if only one column is parsed
df_squeeze = pd.read_csv('content/avito_sep.csv',
                         sep=';',
                         usecols=['city'],
                         )

df_squeeze_city = df_squeeze['city']
print(df_squeeze)

             city
0       Волгоград
1     Нижняя Тура
2          Бердск
3         Саратов
4         Бузулук
5  Ростов-на-Дону
6        Оренбург
7     Калининград
8     Новосибирск
9       Полевской


In [49]:
df = pd.read_csv('content/avito_sep.csv',
                 sep=';',
                 usecols=['user_id', 'city', 'price', 'activation_date'],
                 )

df.info()

df1 = pd.read_csv('content/avito_sep.csv',
                  sep=';',
                  usecols=['user_id', 'city', 'price', 'activation_date'],
                  dtype={'city': 'category', 'user_id': 'string'}
                  )

df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   user_id          10 non-null     object 
 1   city             10 non-null     object 
 2   price            9 non-null      float64
 3   activation_date  10 non-null     object 
dtypes: float64(1), object(3)
memory usage: 452.0+ bytes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   user_id          10 non-null     string  
 1   city             10 non-null     category
 2   price            9 non-null      float64 
 3   activation_date  10 non-null     object  
dtypes: category(1), float64(1), object(1), string(1)
memory usage: 762.0+ bytes


In [51]:
# Argument n_rows
df_nrows = pd.read_csv('content/avito_sep.csv',
                       sep=';',
                       nrows=5)
print(df_nrows)

        user_id         city               category_name    price  \
0  dbe73ad6e4b5    Волгоград      Детская одежда и обувь      NaN   
1  2e11806abe57  Нижняя Тура                  Велосипеды   3000.0   
2  0b850bbebb10       Бердск               Аудио и видео  15000.0   
3   5f1d53ce0da      Саратов             Бытовая техника   4500.0   
4  23e2d97bfc7f      Бузулук  Товары для детей и игрушки   4900.0   

  activation_date user_type  year  month  day  
0      2017-04-18   Private  2017      4   18  
1      2017-04-16   Private  2017      4   16  
2      2017-04-17   Private  2017      4   17  
3      2017-04-17   Private  2017      4   17  
4      2017-04-15   Private  2017      4   15  


In [None]:
# Arguments parse_dates=...
df_parse_dates = pd.read_csv('content/avito_sep.csv',
                             sep=';',
                             parse_dates=['activation_date', 'price', 'category_name'])

df_parse_dates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   user_id          10 non-null     object        
 1   city             10 non-null     object        
 2   category_name    10 non-null     object        
 3   price            9 non-null      object        
 4   activation_date  10 non-null     datetime64[ns]
 5   user_type        10 non-null     object        
 6   year             10 non-null     int64         
 7   month            10 non-null     int64         
 8   day              10 non-null     int64         
dtypes: datetime64[ns](1), int64(3), object(5)
memory usage: 852.0+ bytes


  df_parse_dates = pd.read_csv('content/avito_sep.csv',
  df_parse_dates = pd.read_csv('content/avito_sep.csv',


In [62]:
df_parse_dates = pd.read_csv('content/avito_sep.csv',
                             sep=';',
                            parse_dates=[['year', 'month', 'day'], 'activation_date', 'category_name']
)

print(df_parse_dates.info())
print(df_parse_dates)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   year_month_day   10 non-null     datetime64[ns]
 1   user_id          10 non-null     object        
 2   city             10 non-null     object        
 3   category_name    10 non-null     object        
 4   price            9 non-null      float64       
 5   activation_date  10 non-null     datetime64[ns]
 6   user_type        10 non-null     object        
dtypes: datetime64[ns](2), float64(1), object(4)
memory usage: 692.0+ bytes
None
  year_month_day       user_id            city               category_name  \
0     2017-04-18  dbe73ad6e4b5       Волгоград      Детская одежда и обувь   
1     2017-04-16  2e11806abe57     Нижняя Тура                  Велосипеды   
2     2017-04-17  0b850bbebb10          Бердск               Аудио и видео   
3     2017-04-17   5f1d53ce0d

  df_parse_dates = pd.read_csv('content/avito_sep.csv',
  df_parse_dates = pd.read_csv('content/avito_sep.csv',


In [66]:
df_parse_dates1 = pd.read_csv('content/avito_sep.csv',
                             sep=';',
                            parse_dates={
                                'data0': ['activation_date'],
                                'data1': ['year', 'month', 'day']
                                }
)

print(df_parse_dates.info())
print(df_parse_dates1)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   year_month_day   10 non-null     datetime64[ns]
 1   user_id          10 non-null     object        
 2   city             10 non-null     object        
 3   category_name    10 non-null     object        
 4   price            9 non-null      float64       
 5   activation_date  10 non-null     datetime64[ns]
 6   user_type        10 non-null     object        
dtypes: datetime64[ns](2), float64(1), object(4)
memory usage: 692.0+ bytes
None
       data0      data1       user_id            city  \
0 2017-04-18 2017-04-18  dbe73ad6e4b5       Волгоград   
1 2017-04-16 2017-04-16  2e11806abe57     Нижняя Тура   
2 2017-04-17 2017-04-17  0b850bbebb10          Бердск   
3 2017-04-17 2017-04-17   5f1d53ce0da         Саратов   
4 2017-04-15 2017-04-15  23e2d97bfc7f         Бузулук   

  df_parse_dates1 = pd.read_csv('content/avito_sep.csv',


In [77]:
# Argument keep_date_col and encoding
df_parse_dates1 = pd.read_csv('content/avito_sep.csv',
                             sep=';',
                             keep_date_col=True,
                            parse_dates={
                                'data1': ['year', 'month', 'day'],
                                },
                            encoding='utf8'
)

print(df_parse_dates1)

       data1       user_id            city               category_name  \
0 2017-04-18  dbe73ad6e4b5       Волгоград      Детская одежда и обувь   
1 2017-04-16  2e11806abe57     Нижняя Тура                  Велосипеды   
2 2017-04-17  0b850bbebb10          Бердск               Аудио и видео   
3 2017-04-17   5f1d53ce0da         Саратов             Бытовая техника   
4 2017-04-15  23e2d97bfc7f         Бузулук  Товары для детей и игрушки   
5 2017-04-12  c2a632af2602  Ростов-на-Дону      Ремонт и строительство   
6 2017-04-17  b239811ad530        Оренбург                    Ноутбуки   
7 2017-04-18  d85fa02e6341     Калининград                    Телефоны   
8 2017-04-18  ae6586719bec     Новосибирск       Товары для компьютера   
9 2017-04-12  30ad26d633ef       Полевской      Детская одежда и обувь   

     price activation_date user_type  year month day  
0      NaN      2017-04-18   Private  2017     4  18  
1   3000.0      2017-04-16   Private  2017     4  16  
2  15000.0      2017

  df_parse_dates1 = pd.read_csv('content/avito_sep.csv',
  df_parse_dates1 = pd.read_csv('content/avito_sep.csv',


### Write to CSV

In [83]:
df = pd.read_csv('content/avito_data.csv')

# Save file
df.to_csv('content/avito_copy.csv', sep=';')

# Save columns
df.to_csv('content/avito_copy.csv',
          sep=';',
          columns=['city', 'price']
          )

# Argument header
df.to_csv('content/avito_copy.csv',
          sep=';',
          columns=['city', 'price'],
          header=['city_copy', 'price_copy']
          )

#Argument index
df.to_csv('content/avito_copy.csv',
          sep=';',
          index=True
          )

## Read and Write SQL

In [89]:
import sqlite3 as sq

def create_table(db='avito_data.db', path='content/avito_data.csv', name_table='avito'):
    con = sq.connect(db)
    
    df = pd.read_csv(path)
    df.to_sql(name_table, con, if_exists='replace', index=False)
    con.close()
    
create_table()

sql_request = '''SELECT * FROM avito'''

df = pd.read_csv('content/avito_data.csv')

with sq.connect('avito_data.db') as con:
    df_sql = pd.read_sql(sql=sql_request,
                         con=con,
                         index_col=['user_id'],
                         parse_dates=['activation'],
                         )
    
print(df_sql)

                        city               category_name    price  \
user_id                                                             
dbe73ad6e4b5       Волгоград      Детская одежда и обувь      NaN   
2e11806abe57     Нижняя Тура                  Велосипеды   3000.0   
0b850bbebb10          Бердск               Аудио и видео  15000.0   
5f1d5c3ce0da         Саратов             Бытовая техника   4500.0   
23e2d97bfc7f         Бузулук  Товары для детей и игрушки   4900.0   
c2a632af2602  Ростов-на-Дону      Ремонт и строительство    500.0   
b239811ad530        Оренбург                    Ноутбуки  20990.0   
d85fa02e6341     Калининград                    Телефоны    990.0   
ae6586719bec     Новосибирск       Товары для компьютера   1200.0   
30ad26d633ef       Полесской      Детская одежда и обувь    400.0   

             activation_date user_type  year  month  day  
user_id                                                   
dbe73ad6e4b5      2017-04-18   Private  2017      4  

## Indexes


In [101]:
df = pd.DataFrame({
    'col_1': [1, 2, 3, 4, 5, 6],
    'col_2': [7, 19, 8, 9, 10, 11],
    'col_3': [12, 13, 14, 15, 16, 17],
    'col_4': [18, 19, 20, 21, 22, 23]
})

df.set_index(['col_1', 'col_2'], inplace=True, append=True)
df.reset_index('col_2', inplace=True, drop=True)
# print(df)
df['col_3'] = 1
df['NEW_COL'] = df['col_3'] + df['col_4']
# del df['col_1']
print(df['col_3'])
print(df[['col_3', 'col_4', 'NEW_COL']])

# print(df.col_1)




   col_1
0  1        1
1  2        1
2  3        1
3  4        1
4  5        1
5  6        1
Name: col_3, dtype: int64
         col_3  col_4  NEW_COL
  col_1                       
0 1          1     18       19
1 2          1     19       20
2 3          1     20       21
3 4          1     21       22
4 5          1     22       23
5 6          1     23       24


In [None]:
list_index = list('qwqituiz')

index_data_1 = pd.Index(list_index,
                      name='rows')

print(index_data_1)

Index(['q', 'w', 'q', 'i', 't', 'u', 'i', 'z'], dtype='object', name='rows')


In [None]:
columns_data = pd.Index(['col_1', 'col_2', 'col_3'],
                        name='cols'
                        )
df_data = pd.DataFrame(data={
    'col_1': [1, 2, 3, 4, 5, 6, 7, 8],
    'col_2': [9, 10, 11, 12, 13, 14, 15, 16],
    'col_3': [17, 18, 19, 20, 21, 22, 23, 24]
},
    index=index_data_1,
    columns=columns_data
    )

print(df_data)

cols  col_1  col_2  col_3
rows                     
q         1      9     17
w         2     10     18
q         3     11     19
i         4     12     20
t         5     13     21
u         6     14     22
i         7     15     23
z         8     16     24


In [108]:
df = pd.DataFrame({f'c_{i}': np.arange(1000)*i for i in range(100)})
print(df)

     c_0  c_1   c_2   c_3   c_4   c_5   c_6   c_7   c_8   c_9  ...   c_90  \
0      0    0     0     0     0     0     0     0     0     0  ...      0   
1      0    1     2     3     4     5     6     7     8     9  ...     90   
2      0    2     4     6     8    10    12    14    16    18  ...    180   
3      0    3     6     9    12    15    18    21    24    27  ...    270   
4      0    4     8    12    16    20    24    28    32    36  ...    360   
..   ...  ...   ...   ...   ...   ...   ...   ...   ...   ...  ...    ...   
995    0  995  1990  2985  3980  4975  5970  6965  7960  8955  ...  89550   
996    0  996  1992  2988  3984  4980  5976  6972  7968  8964  ...  89640   
997    0  997  1994  2991  3988  4985  5982  6979  7976  8973  ...  89730   
998    0  998  1996  2994  3992  4990  5988  6986  7984  8982  ...  89820   
999    0  999  1998  2997  3996  4995  5994  6993  7992  8991  ...  89910   

      c_91   c_92   c_93   c_94   c_95   c_96   c_97   c_98   c_99  
0     

In [118]:
# Get indexes and columns
index = df.index
columns = df.columns

index.to_list()
columns.to_numpy()


print(index.to_list(), columns.to_numpy, sep='\n\n')

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [None]:
list_index = list('qwqituiz')

index_data_1 = pd.Index(list_index,
                      name='rows')


df_data = pd.DataFrame(data={
    'col_1': [1, 2, 3, 4, 5, 6, 7, 8],
    'col_2': [9, 10, 11, 12, 13, 14, 15, 16],
    'col_3': [17, 18, 19, 20, 21, 22, 23, 24]
},
    index=index_data_1,
    columns=columns_data
    )


index_data = df_data.index

print(index_data.unique())
print(index_data.nunique())
print(index_data.is_unique)
print(index_data.duplicated())

Index(['q', 'w', 'i', 't', 'u', 'z'], dtype='object', name='rows')
6
False
[False False  True False False False  True False]
rows cols


In [132]:
print(index_data.name, columns_data.name)
index_data.name = 'rows_new'
columns_data.name = 'cols_new'

df_data_renamed = df_data.rename(columns={
    'col_1': 'col_11',
    'col_2': 'col_22',
    'col_3': 'col_33',
})


df_data.rename(str.upper,
                axis=0,
                inplace=True)

print(df_data_renamed)
print(df_data)

rows_new cols_new
cols_new  col_11  col_22  col_33
rows_new                        
q              1       9      17
w              2      10      18
q              3      11      19
i              4      12      20
t              5      13      21
u              6      14      22
i              7      15      23
z              8      16      24
cols_new  col_1  col_2  col_3
rows_new                     
Q             1      9     17
W             2     10     18
Q             3     11     19
I             4     12     20
T             5     13     21
U             6     14     22
I             7     15     23
Z             8     16     24


In [139]:
# NONE's in indexes

index = pd.Index([1, np.nan, 3, np.nan, 5])

df_nan = pd.DataFrame(data=np.arange(5), index=index)

print(df_nan)
print(df_nan.index.hasnans)
print(df_nan.index.isna())
print(df_nan.index.dropna())


     0
1.0  0
NaN  1
3.0  2
NaN  3
5.0  4
True
[False  True False  True False]
Index([1.0, 3.0, 5.0], dtype='float64')


# MultiIndex