# Basic data structure in pandas

`Series`: a one-dimensional labeled array holding data of any type such as integers, strings, Python objects etc.\
`Dataframe`: a two-dimensional data structure that holds data like a two-dimension array or a table with rows and columns.


In [9]:
import pandas as pd
import numpy as np

# Create a series by passing a list of values, letting pandas create a default RangeIndex.


s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

# create a dataframe by passing a NumPy array with a datetime index using date_range() and labeled columns:

dates = pd.date_range('20250512', periods=6)
print(dates)

df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df





0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
DatetimeIndex(['2025-05-12', '2025-05-13', '2025-05-14', '2025-05-15',
               '2025-05-16', '2025-05-17'],
              dtype='datetime64[ns]', freq='D')


Unnamed: 0,A,B,C,D
2025-05-12,-0.955227,-0.179285,0.568367,-1.053957
2025-05-13,0.570811,-0.297194,0.153778,-1.939956
2025-05-14,1.364293,-0.819797,-0.56981,-1.377245
2025-05-15,0.818688,0.168799,0.919991,-0.910814
2025-05-16,-0.566761,0.6178,0.956261,0.51419
2025-05-17,-1.14298,-0.247959,-0.04371,-1.277294


In [10]:
import pandas as pd
import numpy as np
# Creating a DataFrame by passing a dictionary of objects where the keys are the column labels and the values are the column values.
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


# Viewing Data

In [None]:
# default is 5
df.tail()

Unnamed: 0,A,B,C,D
2025-05-13,0.570811,-0.297194,0.153778,-1.939956
2025-05-14,1.364293,-0.819797,-0.56981,-1.377245
2025-05-15,0.818688,0.168799,0.919991,-0.910814
2025-05-16,-0.566761,0.6178,0.956261,0.51419
2025-05-17,-1.14298,-0.247959,-0.04371,-1.277294


In [14]:
df.index

DatetimeIndex(['2025-05-12', '2025-05-13', '2025-05-14', '2025-05-15',
               '2025-05-16', '2025-05-17'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.to_numpy()

array([[-0.95522687, -0.17928525,  0.5683672 , -1.05395718],
       [ 0.57081058, -0.29719363,  0.15377757, -1.93995562],
       [ 1.36429298, -0.81979676, -0.56981025, -1.37724537],
       [ 0.8186884 ,  0.1687987 ,  0.91999113, -0.91081381],
       [-0.56676111,  0.61779998,  0.95626117,  0.51419042],
       [-1.14297997, -0.24795901, -0.0437102 , -1.27729394]])

In [17]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.014804,-0.126273,0.330813,-1.007513
std,1.038859,0.483429,0.596235,0.825351
min,-1.14298,-0.819797,-0.56981,-1.939956
25%,-0.85811,-0.284885,0.005662,-1.352258
50%,0.002025,-0.213622,0.361072,-1.165626
75%,0.756719,0.081778,0.832085,-0.9466
max,1.364293,0.6178,0.956261,0.51419


In [18]:
df.T

Unnamed: 0,2025-05-12,2025-05-13,2025-05-14,2025-05-15,2025-05-16,2025-05-17
A,-0.955227,0.570811,1.364293,0.818688,-0.566761,-1.14298
B,-0.179285,-0.297194,-0.819797,0.168799,0.6178,-0.247959
C,0.568367,0.153778,-0.56981,0.919991,0.956261,-0.04371
D,-1.053957,-1.939956,-1.377245,-0.910814,0.51419,-1.277294
