# usage of astype()

In [1]:
import pandas as pd
import numpy as np

# Create DataFrame from Dictionary
technologies = {
    'Fee' :["20000","25000","26000"],
    'Discount':["1000","2300","1500"]
              }
data = pd.DataFrame(technologies)
print(data)

     Fee Discount
0  20000     1000
1  25000     2300
2  26000     1500


In [2]:
print(data.dtypes)

Fee         object
Discount    object
dtype: object


In [5]:
# Cast all columns to int
# df = df.astype('int64')
# df = df.astype('int')
data = data.astype(np.int64)
print(data.dtypes)

Fee         int64
Discount    int64
dtype: object


In [6]:
# we can also use like this
data = data.astype('int')
print(data.dtypes)

Fee         int64
Discount    int64
dtype: object


In [7]:
# Cast all columns to string
data = data.astype('string')
print(data.dtypes)

Fee         string[python]
Discount    string[python]
dtype: object


In [8]:
# Cast all columns to float
data = data.astype('float')
print(data.dtypes)

Fee         float64
Discount    float64
dtype: object


In [10]:
# same like this
data = data.astype(np.float64)
print(data.dtypes)

Fee         float64
Discount    float64
dtype: object


In [11]:
print(data)

       Fee  Discount
0  20000.0    1000.0
1  25000.0    2300.0
2  26000.0    1500.0


In [19]:
# Astype() - Cast Multiple Columns Using Dict 

import pandas as pd
import numpy as np

# Create DataFrame from Dictionary
technologies = {
    'Courses':["Spark","PySpark","Hadoop"],
    'Fee' :["20000","25000","26000"],
    'Duration':['30day','40days','35days'],
    'Discount':["1000","2300","1500"]}

df = pd.DataFrame(technologies)
print(df)
print("----------------------------------")
print(df.dtypes)

   Courses    Fee Duration Discount
0    Spark  20000    30day     1000
1  PySpark  25000   40days     2300
2   Hadoop  26000   35days     1500
----------------------------------
Courses     object
Fee         object
Duration    object
Discount    object
dtype: object


In [20]:
df = df.astype({'Courses':'string', 'Fee':'int', 'Discount':'float'})
print(df.dtypes)

Courses     string[python]
Fee                  int64
Duration            object
Discount           float64
dtype: object


In [21]:
df = df["Duration"].astype('string')

In [22]:
print(df)

0     30day
1    40days
2    35days
Name: Duration, dtype: string


In [23]:
print(df.dtypes)

string


In [5]:
import pandas as pd
import numpy as np

technologies = {
    'Courses':["Spark","PySpark","Hadoop"],
    'Fee' :["20000","25000","26000"],
    'Duration':['30day','40days','35days'],
    'Discount':["1000","2300","1500"]}
data = pd.DataFrame(technologies)
print(data)
print('------------------------')
print(data.dtypes)

   Courses    Fee Duration Discount
0    Spark  20000    30day     1000
1  PySpark  25000   40days     2300
2   Hadoop  26000   35days     1500
------------------------
Courses     object
Fee         object
Duration    object
Discount    object
dtype: object


In [8]:
d1 = data.astype({'Courses':'string', 'Fee':'int', 'Discount':'float'})
print(d1.dtypes)

Courses     string[python]
Fee                  int64
Duration            object
Discount           float64
dtype: object


In [10]:
# Raise error when unable to cast
d1.Courses = d1.Courses.astype('int')

ValueError: invalid literal for int() with base 10: 'Spark'

In [11]:
# Ignore error when unable to cast
d1.Courses = d1.Courses.astype('int', errors='ignore')
print(d1.dtypes)

Courses     string[python]
Fee                  int64
Duration            object
Discount           float64
dtype: object


In [17]:
data = pd.read_csv("C:\\Users\\SHREE\\Desktop\\Z-data_samples\\data.csv")
print(data)
print('-----------------------')
print(data.dtypes)

       Name  Rank  Marks
0    Rossum     1     95
1    Travis     3     80
2  MCKinney     2     90
3   JHunter     4     89
4    Dennis     5     79
-----------------------
Name     object
Rank      int64
Marks     int64
dtype: object


In [20]:
print(data['Name'].dtypes)

object


In [22]:
d1 = data['Name'].astype('int', errors='ignore')
print(d1)

0      Rossum
1      Travis
2    MCKinney
3     JHunter
4      Dennis
Name: Name, dtype: object


In [26]:
data['Rank'].astype("float")

0    1.0
1    3.0
2    2.0
3    4.0
4    5.0
Name: Rank, dtype: float64

In [27]:
print(data['Rank'].dtypes)

int64


In [36]:
d2 = data['Marks'].astype('string')
print(d2)

0    95
1    80
2    90
3    89
4    79
Name: Marks, dtype: string


In [53]:
data['Marks'].dtypes

dtype('int64')