In [None]:
import pandas as pd
import numpy as np

# Python data types
python_data_types = {
    'int': 'Integer numbers',
    'float': 'Floating point numbers',
    'bool': 'Boolean (True or False)',
    'str': 'String (text)',
    'list': 'List (ordered collection of items)',
    'tuple': 'Tuple (immutable ordered collection of items)',
    'dict': 'Dictionary (unordered collection of key-value pairs)',
    'set': 'Set (unordered collection of unique items)'
}

# Numpy data types
numpy_data_types = {
    'int32': 'Integer (-2147483648 to 2147483647)',
    'int64': 'Integer (-9223372036854775808 to 9223372036854775807)',
    'float32': 'Single precision float',
    'float64': 'Double precision float',
    'bool_': 'Boolean (True or False)',
    'complex64': 'Complex number, represented by two 32-bit floats',
    'complex128': 'Complex number, represented by two 64-bit floats',
    'datetime64': 'Date time type',
    'timedelta64': 'Represents the difference between two dates or times'
}

# Create a DataFrame for comparison
df = pd.DataFrame({
    'Python Data Types': list(python_data_types.keys()),
    'Python Description': list(python_data_types.values())
}).merge(
    pd.DataFrame({
        'Numpy Data Types': list(numpy_data_types.keys()),
        'Numpy Description': list(numpy_data_types.values())
    }),
    left_index=True,
    right_index=True,
    how='outer'
)

df

In [None]:
# Creating numpy arrays with mixed data types including Python objects

# Array with integers and floats
mixed_array_1 = np.array([1, 2.5, 3, 4.2])

# Array with integers, floats, and strings
mixed_array_2 = np.array([1, 2.5, 'three'])

# Array with integers and Python lists
mixed_array_3 = np.array([1, 2, [3, 4, 5]])

mixed_array_1, mixed_array_2, mixed_array_3

(array([1. , 2.5, 3. , 4.2]),
 array(['1', '2.5', 'three'], dtype='<U32'),
 array([1, 2, list([3, 4, 5])], dtype=object))

In [None]:
# Static casting using numpy's astype() method

# Casting mixed_array_1 to integer
static_cast_1 = mixed_array_1.astype(int)

# Dynamic casting using numpy's asarray() method

# Casting mixed_array_1 to float64
dynamic_cast_1 = np.asarray(mixed_array_1, dtype=np.float64)

# Casting mixed_array_3 to object
dynamic_cast_2 = np.asarray(mixed_array_3, dtype=object)

static_cast_1, dynamic_cast_1, dynamic_cast_2

(array([1, 2, 3, 4]),
 array([1. , 2.5, 3. , 4.2]),
 array([1, 2, list([3, 4, 5])], dtype=object))

In [None]:
# Creating more numpy arrays with a mix of string and numeric literals

# Array with integers and strings
mixed_array_4 = np.array([1, 'two', 3, 'four'])

# Array with floats and strings
mixed_array_5 = np.array([1.5, 'two point five', 3.5, 'four point five'])

# Array with a mix of integers, floats, and strings
mixed_array_6 = np.array([1, 2.5, 'three', 4, 5.5, 'six'])

# Static casting: Attempting to cast mixed_array_4 to float (will raise an error)
try:
    static_cast_2 = mixed_array_4.astype(float)
except Exception as e:
    static_cast_2_error = str(e)

# Dynamic casting: Attempting to cast mixed_array_5 to object
dynamic_cast_3 = np.asarray(mixed_array_5, dtype=object)

mixed_array_4, mixed_array_5, mixed_array_6, static_cast_2_error, dynamic_cast_3

(array(['1', 'two', '3', 'four'], dtype='<U21'),
 array(['1.5', 'two point five', '3.5', 'four point five'], dtype='<U32'),
 array(['1', '2.5', 'three', '4', '5.5', 'six'], dtype='<U32'),
 "could not convert string to float: 'two'",
 array(['1.5', 'two point five', '3.5', 'four point five'], dtype=object))

In [None]:
# Creating numpy arrays with a mix of numeric literals and a dict object

# Array with integers and a dictionary
mixed_array_7 = np.array([1, 2, 3, {'a': 1, 'b': 2}])

# Array with floats and a dictionary
mixed_array_8 = np.array([1.5, 2.5, 3.5, {'x': 10.5, 'y': 20.5}])

# Array with a mix of integers, floats, and a dictionary
mixed_array_9 = np.array([1, 2.5, 3, 4.5, {'key': 'value'}])

mixed_array_7, mixed_array_8, mixed_array_9

(array([1, 2, 3, {'a': 1, 'b': 2}], dtype=object),
 array([1.5, 2.5, 3.5, {'x': 10.5, 'y': 20.5}], dtype=object),
 array([1, 2.5, 3, 4.5, {'key': 'value'}], dtype=object))

In [None]:
# Checking the types of the mixed arrays using dtype and type()
dtype_mixed_array_7 = mixed_array_7.dtype
type_mixed_array_7 = type(mixed_array_7)

dtype_mixed_array_8 = mixed_array_8.dtype
type_mixed_array_8 = type(mixed_array_8)

dtype_mixed_array_9 = mixed_array_9.dtype
type_mixed_array_9 = type(mixed_array_9)

# Static casting: Attempting to cast mixed_array_7 to string
try:
    static_cast_3 = mixed_array_7.astype(str)
except Exception as e:
    static_cast_3_error = str(e)

# Dynamic casting: Attempting to cast mixed_array_8 to object
dynamic_cast_4 = np.asarray(mixed_array_8, dtype=object)

dtype_mixed_array_7, type_mixed_array_7, dtype_mixed_array_8, type_mixed_array_8, dtype_mixed_array_9, type_mixed_array_9, static_cast_3_error, dynamic_cast_4

NameError: name 'static_cast_3_error' is not defined

In [None]:
# Correcting the error and checking the types again

# Static casting: Attempting to cast mixed_array_7 to string
static_cast_3 = mixed_array_7.astype(str)

dtype_mixed_array_7, type_mixed_array_7, dtype_mixed_array_8, type_mixed_array_8, dtype_mixed_array_9, type_mixed_array_9, static_cast_3, dynamic_cast_4

(dtype('O'),
 numpy.ndarray,
 dtype('O'),
 numpy.ndarray,
 dtype('O'),
 numpy.ndarray,
 array(['1', '2', '3', "{'a': 1, 'b': 2}"], dtype='<U16'),
 array([1.5, 2.5, 3.5, {'x': 10.5, 'y': 20.5}], dtype=object))

In [None]:
# Given dataset
data = np.array([
    [101, '25', 'I love Python!'],
    [102, 30, 12345],
    [103, 'NA', 'Data wrangling is fun!'],
    [104, 28, 'Missing comment here'],
    [105, 'twenty', 67890]
])

# Step 1: Identify and handle missing or erroneous age data

# Function to convert age to integer or return None if not possible
def convert_age(age):
    try:
        return int(age)
    except ValueError:
        return None

# Apply the function to the age column
data[:, 1] = np.vectorize(convert_age)(data[:, 1])

# Step 2: Convert all comments to strings
data[:, 2] = data[:, 2].astype(str)

# Cleaned dataset
data

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

In [None]:
# Modified approach to handle erroneous age data

# Convert valid ages to integers and keep others as is
ages = np.array([convert_age(age) for age in data[:, 1]])

# Replace non-integer values with None
ages[~np.array([isinstance(age, int) for age in ages])] = None

# Update the age column in the dataset
data[:, 1] = ages

# Cleaned dataset
data

array([['101', '25', 'I love Python!'],
       ['102', '30', '12345'],
       ['103', 'None', 'Data wrangling is fun!'],
       ['104', '28', 'Missing comment here'],
       ['105', 'None', '67890']], dtype='<U22')

In [None]:
# Alternative Pythonic solution using list comprehensions

# Given dataset
data_list = [
    [101, '25', 'I love Python!'],
    [102, 30, 12345],
    [103, 'NA', 'Data wrangling is fun!'],
    [104, 28, 'Missing comment here'],
    [105, 'twenty', 67890]
]

# Cleaning the dataset
cleaned_data_list = [
    [record[0], convert_age(record[1]), str(record[2])] for record in data_list
]

cleaned_data_list

[[101, 25, 'I love Python!'],
 [102, 30, '12345'],
 [103, None, 'Data wrangling is fun!'],
 [104, 28, 'Missing comment here'],
 [105, None, '67890']]