##### Data Types and Column Operations

In [1]:
import pandas as pd
import numpy as np

##### Data Type Handling
Working with different data types in pandas

In [2]:
# Create DataFrame with multiple types
dft = pd.DataFrame({
    'A': np.random.rand(3),
    'B': 1,
    'C': 'foo',
    'D': pd.Timestamp('20010102'),
    'E': pd.Series([1.0] * 3).astype('float32'),
    'F': False,
    'G': pd.Series([1] * 3, dtype='int8')
})

print("DataFrame with multiple types:")
print(dft)
print("\nData types of each column:")
print(dft.dtypes)

DataFrame with multiple types:
          A  B    C          D    E      F  G
0  0.073607  1  foo 2001-01-02  1.0  False  1
1  0.046409  1  foo 2001-01-02  1.0  False  1
2  0.941641  1  foo 2001-01-02  1.0  False  1

Data types of each column:
A          float64
B            int64
C           object
D    datetime64[s]
E          float32
F             bool
G             int8
dtype: object


In [3]:
# Type coercion examples
print("Integers coerced to floats:")
print(pd.Series([1, 2, 3, 4, 5, 6.]))

print("\nMixed types coerced to object:")
print(pd.Series([1, 2, 3, 6., 'foo']))

Integers coerced to floats:
0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

Mixed types coerced to object:
0      1
1      2
2      3
3    6.0
4    foo
dtype: object


In [4]:
# Count of each dtype
print("Count of each dtype:")
print(dft.dtypes.value_counts())

Count of each dtype:
float64          1
int64            1
object           1
datetime64[s]    1
float32          1
bool             1
int8             1
Name: count, dtype: int64


##### Numeric Type Operations
Working with different numeric types

In [5]:
# Create DataFrames with different numeric types
df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')
df2 = pd.DataFrame({
    'A': pd.Series(np.random.randn(8), dtype='float16'),
    'B': pd.Series(np.random.randn(8)),
    'C': pd.Series(np.array(np.random.randn(8), dtype='uint8'))
})

print("DataFrame 1 (float32):")
print(df1)
print("\nDataFrame 2 (mixed types):")
print(df2)

DataFrame 1 (float32):
          A
0 -1.023313
1  1.029768
2  0.939275
3 -1.034283
4  0.462525
5  0.255806
6  0.064092
7 -0.101829

DataFrame 2 (mixed types):
          A         B    C
0 -0.036194 -0.049705    0
1 -1.514648  0.472076  255
2 -0.057983 -1.115859    1
3 -1.268555 -0.219221    0
4 -0.221802 -0.658629    0
5 -0.327393  1.265951    1
6 -1.211914 -0.389601    0
7  0.654785 -0.207451    0


  has_large_values = (abs_vals > 1e6).any()


In [6]:
# Upcasting example
df3 = df1.reindex_like(df2).fillna(value=0.0) + df2
print("Result of operation (note dtype changes):")
print(df3)
print("\nResulting dtypes:")
print(df3.dtypes)

Result of operation (note dtype changes):
          A         B      C
0 -1.059507 -0.049705    0.0
1 -0.484880  0.472076  255.0
2  0.881291 -1.115859    1.0
3 -2.302837 -0.219221    0.0
4  0.240724 -0.658629    0.0
5 -0.071586  1.265951    1.0
6 -1.147822 -0.389601    0.0
7  0.552956 -0.207451    0.0

Resulting dtypes:
A    float32
B    float64
C    float64
dtype: object


##### Column Operations
Adding and manipulating columns

In [7]:
# Create sample DataFrame
df = pd.DataFrame({
    'one': pd.Series([1., 2., 3., np.nan]),
    'flag': pd.Series([False, False, True, False]),
    'foo': 'bar'
}, index=['a', 'b', 'c', 'd'])

print("Original DataFrame:")
print(df)

Original DataFrame:
   one flag  foo
a  NaN  NaN  bar
b  NaN  NaN  bar
c  NaN  NaN  bar
d  NaN  NaN  bar


In [8]:
# Add truncated column
df['one_trunc'] = df['one'][:2]
print("After adding truncated column:")
print(df)

After adding truncated column:
   one flag  foo  one_trunc
a  NaN  NaN  bar        NaN
b  NaN  NaN  bar        NaN
c  NaN  NaN  bar        NaN
d  NaN  NaN  bar        NaN


In [9]:
# Insert column at specific location
df.insert(1, 'bar', df['one'])
print("After inserting column:")
print(df)

After inserting column:
   one  bar flag  foo  one_trunc
a  NaN  NaN  NaN  bar        NaN
b  NaN  NaN  NaN  bar        NaN
c  NaN  NaN  NaN  bar        NaN
d  NaN  NaN  NaN  bar        NaN


##### Column Assignment with assign()
Creating new columns in method chains

In [17]:
# Load iris dataset
# Option 1: Try with latin-1 encoding (most permissive)
iris = pd.read_csv('data/iris.data', 
                  names=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Name'])
print("Iris dataset:")
print(iris.head())

Iris dataset:
    SepalLength   SepalWidth   PetalLength   PetalWidth     Name
0  sepal.length  sepal.width  petal.length  petal.width  variety
1           5.1          3.5           1.4           .2   Setosa
2           4.9            3           1.4           .2   Setosa
3           4.7          3.2           1.3           .2   Setosa
4           4.6          3.1           1.5           .2   Setosa


In [20]:
# convert to int
iris[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']] = iris[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']].apply(pd.to_numeric, errors='coerce')

In [21]:
# Add new column using assign
print("With sepal ratio:")
print(iris.assign(
    sepal_ratio=lambda x: x['SepalWidth'] / x['SepalLength']
).head())

With sepal ratio:
   SepalLength  SepalWidth  PetalLength  PetalWidth     Name  sepal_ratio
0          NaN         NaN          NaN         NaN  variety          NaN
1          5.1         3.5          1.4         0.2   Setosa     0.686275
2          4.9         3.0          1.4         0.2   Setosa     0.612245
3          4.7         3.2          1.3         0.2   Setosa     0.680851
4          4.6         3.1          1.5         0.2   Setosa     0.673913


In [22]:
# Multiple assignments in chain
result = (iris.query('SepalLength > 5')
          .assign(
              SepalRatio=lambda x: x.SepalWidth / x.SepalLength,
              PetalRatio=lambda x: x.PetalWidth / x.PetalLength
          ))

print("Filtered data with ratios:")
print(result.head())

Filtered data with ratios:
    SepalLength  SepalWidth  PetalLength  PetalWidth    Name  SepalRatio  \
1           5.1         3.5          1.4         0.2  Setosa    0.686275   
6           5.4         3.9          1.7         0.4  Setosa    0.722222   
11          5.4         3.7          1.5         0.2  Setosa    0.685185   
15          5.8         4.0          1.2         0.2  Setosa    0.689655   
16          5.7         4.4          1.5         0.4  Setosa    0.771930   

    PetalRatio  
1     0.142857  
6     0.235294  
11    0.133333  
15    0.166667  
16    0.266667  
