
Create a DataFrame from a NumPy array with custom column names.

In [1]:
import pandas as pd
import numpy as np

In [2]:
numpy_array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

column_names = ['Column1', 'Column2', 'Column3']

df = pd.DataFrame(data=numpy_array, columns=column_names)

print(df)


   Column1  Column2  Column3
0        1        2        3
1        4        5        6
2        7        8        9


Extract the date and time components from a DateTime column.

In [3]:
data = {'DateTime': ['2012-01-01 08:30:00', '2012-01-02 12:45:00', '2012-01-03 18:15:00']}
df = pd.DataFrame(data)

df['DateTime'] = pd.to_datetime(df['DateTime'])

df['Date'] = df['DateTime'].dt.date
df['Time'] = df['DateTime'].dt.time

print(df)

             DateTime        Date      Time
0 2012-01-01 08:30:00  2012-01-01  08:30:00
1 2012-01-02 12:45:00  2012-01-02  12:45:00
2 2012-01-03 18:15:00  2012-01-03  18:15:00


 Resample time-series data in a DataFrame.


In [4]:
date_rng = pd.date_range(start='2012-01-01', end='2012-01-10', freq='D')
data = {'Value': [10, 15, 20, 25, 30, 35, 40, 45, 50, 55]}
df = pd.DataFrame(data, index=date_rng)

print(date_rng)
print(data)

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',
               '2012-01-09', '2012-01-10'],
              dtype='datetime64[ns]', freq='D')
{'Value': [10, 15, 20, 25, 30, 35, 40, 45, 50, 55]}


In [5]:
resampled_df = df.resample('W').mean()

print("Original DataFrame:")
print(df)

print("\nResampled DataFrame:")
print(resampled_df)

Original DataFrame:
            Value
2012-01-01     10
2012-01-02     15
2012-01-03     20
2012-01-04     25
2012-01-05     30
2012-01-06     35
2012-01-07     40
2012-01-08     45
2012-01-09     50
2012-01-10     55

Resampled DataFrame:
            Value
2012-01-01   10.0
2012-01-08   30.0
2012-01-15   52.5


 Perform a cross-tabulation between two columns in a DataFrame.

In [6]:
data = {'Category': ['A', 'B', 'A', 'B', 'C', 'A', 'C', 'C', 'B', 'A'],
        'Value': [10, 15, 20, 25, 30, 35, 40, 45, 50, 55]}

df = pd.DataFrame(data)

cross_tab = pd.crosstab(df['Category'], df['Value'])

print(cross_tab)


Value     10  15  20  25  30  35  40  45  50  55
Category                                        
A          1   0   1   0   0   1   0   0   0   1
B          0   1   0   1   0   0   0   0   1   0
C          0   0   0   0   1   0   1   1   0   0
