In [9]:
import pandas as pd

pd.set_option("mode.copy_on_write", True)

# From...

## Lists

In [23]:
numbers = [1, 2, 3, 4, 5]
df = pd.DataFrame(numbers, columns=['value'])
df

Unnamed: 0,value
0,1
1,2
2,3
3,4
4,5


## Series

When pandas concatenates the series into a DataFrame:

- It uses the union of all indices as the index
- If a series doesn't have data for a particular index value, that cell will be filled with NaN

In [10]:
series1 = pd.Series([1, 2], index=["2020-01-01", "2020-01-02"])
series2 = pd.Series([3, 4], index=["2020-01-02", "2020-01-03"])

df = pd.DataFrame({"A": series1, "B": series2})

print(df)

              A    B
2020-01-01  1.0  NaN
2020-01-02  2.0  3.0
2020-01-03  NaN  4.0


## Dicts

In [11]:
## 1. Dict of lists/arrays
# Keys become column names, values become column data

data = {
    "name": ["John", "Jane", "Bob"],
    "age": [25, 30, 35],
    "city": ["NY", "SF", "LA"],
}
df1 = pd.DataFrame(data)
df1

Unnamed: 0,name,age,city
0,John,25,NY
1,Jane,30,SF
2,Bob,35,LA


In [12]:
data = [
    {"name": "John", "age": 25, "city": "NY"},
    {"name": "Jane", "age": 30, "city": "SF"},
    {"name": "Bob", "age": 35, "city": "LA"},
]
df2 = pd.DataFrame(data)
df2

Unnamed: 0,name,age,city
0,John,25,NY
1,Jane,30,SF
2,Bob,35,LA


In [13]:
# ## 3. Dict of dicts
# # Outer dict keys become index, inner dict keys become columns

data = {
    "001": {"name": "John", "age": 25, "city": "NY"},
    "002": {"name": "Jane", "age": 30, "city": "SF"},
    "003": {"name": "Bob", "age": 35, "city": "LA"},
}
df3 = pd.DataFrame(data).T  # Transpose to get expected orientation
df3

Unnamed: 0,name,age,city
1,John,25,NY
2,Jane,30,SF
3,Bob,35,LA


# With...

## Series Elements

In [None]:
series1 = pd.Series([1, 2], index=["2020-01-01", "2020-01-02"])
series2 = pd.Series([3, 4], index=["2020-01-02", "2020-01-03"])

In [24]:
# If each series should be in the same column
df = pd.DataFrame(
    [[series1], [series2]],   # Put each pd.Series in a list
    columns=['value']
)

df

Unnamed: 0,value
0,2020-01-01 1 2020-01-02 2 dtype: int64
1,2020-01-02 3 2020-01-03 4 dtype: int64


In [14]:
# If each series should be in a different column
df = pd.DataFrame(
    {
        "A": [series1],  # Put each pd.Series in a list
        "B": [series2],
    }
)

df

Unnamed: 0,A,B
0,2020-01-01 1 2020-01-02 2 dtype: int64,2020-01-02 3 2020-01-03 4 dtype: int64


## DataFrame Elements

In [19]:
df1 = pd.DataFrame({
     'A': [1, 2, 3],
     'B': [4, 5, 6],
     'C': [7, 8, 9]
})

df2 = pd.DataFrame({
     'D': [10, 20, 30],
     'E': [40, 50, 60],
})

df = pd.DataFrame(
    {
        "Col1": [df1],  # Put each pd.DataFrame in a list
        "Col2": [df2],
    }
)

df

Unnamed: 0,Col1,Col2
0,A B C 0 1 4 7 1 2 5 8 2 3 6 9,D E 0 10 40 1 20 50 2 30 60


In [22]:
df["Col1"].iloc[0]

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


## An Index

In [16]:
# Method 1 - Creating with a named index during initialization
df = pd.DataFrame(
    {'value': [10, 20, 30]},
    index=pd.Index(['a', 'b', 'c'], name='letter_grade')
)

df

Unnamed: 0_level_0,value
letter_grade,Unnamed: 1_level_1
a,10
b,20
c,30


In [17]:
# Method 2 - Setting the index name after creation:

df = pd.DataFrame(
    {'value': [10, 20, 30]},
    index=['a', 'b', 'c']
)

# Then set the index name
df.index.name = 'letter_grade'

df

Unnamed: 0_level_0,value
letter_grade,Unnamed: 1_level_1
a,10
b,20
c,30
