In [1]:
import pandas as pd
import numpy as np


In [2]:
my_array = np.ones(3, dtype=([('foo', int), ('bar', float)]))
print(my_array['foo'])


[1 1 1]


In [3]:
my_array2 = my_array.view(np.recarray)
print(my_array2)


[(1, 1.) (1, 1.) (1, 1.)]


In [4]:
data = np.array([['', 'Col1', 'Col2'],
                 ['Row1', 1, 2],
                 ['Row2', 3, 4]
])

df = pd.DataFrame(data=data[1:,1:],
                  index=data[1:,0],
                  columns=data[0,1:]
)

df


Unnamed: 0,Col1,Col2
Row1,1,2
Row2,3,4


In [5]:
my_2darray = np.array([[1, 2, 3], [4, 5, 6]])
pd.DataFrame(my_2darray)


Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


In [6]:
my_dict = {1: ['1', '3'], 2: ['1', '2'], 3: ['2', '4']}
print(pd.DataFrame(my_dict))


   1  2  3
0  1  1  2
1  3  2  4


In [7]:
my_df = pd.DataFrame(data=[4, 5, 6, 7], index=range(4), columns=['A'])
pd.DataFrame(my_df)


Unnamed: 0,A
0,4
1,5
2,6
3,7


In [8]:
my_series = pd.Series({'Belgium': 'Brussels', 'India': 'New Delhi', 'United Kingdom': 'London', 'United States': 'Washington'})
pd.DataFrame(my_series)


Unnamed: 0,0
Belgium,Brussels
India,New Delhi
United Kingdom,London
United States,Washington


In [9]:
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]))
df.shape


(2, 3)

In [10]:
len(df)


2

In [11]:
df = pd.DataFrame({'A': [1, 4, 7], 'B': [2, 5, 8], 'C': [3, 6, 9]})
df


Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [12]:
df.iloc[0][0]


  df.iloc[0][0]


1

In [13]:
df.loc[0]['A']


1

In [14]:
df.at[0, 'A']


1

In [15]:
df.iat[0, 0]


1

In [16]:
df.iloc[0]


A    1
B    2
C    3
Name: 0, dtype: int64

In [17]:
df.loc[:, 'A']


0    1
1    4
2    7
Name: A, dtype: int64

In [18]:
df


Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [19]:
df.set_index('C')


Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
3,1,2
6,4,5
9,7,8


In [20]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), index= [2, 'A', 4], columns=[48, 49, 50])

print(df.loc[2])
df.iloc[2]


48    1
49    2
50    3
Name: 2, dtype: int32


48    7
49    8
50    9
Name: 4, dtype: int32

In [21]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), index=[2.5, 12.6, 4.8], columns=[48, 49, 50])
df.loc[2] = [11, 12, 13]
df


Unnamed: 0,48,49,50
2.5,1,2,3
12.6,4,5,6
4.8,7,8,9
2.0,11,12,13


In [22]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['A', 'B', 'C'])
df['D'] = df.index
df


Unnamed: 0,A,B,C,D
0,1,2,3,0
1,4,5,6,1
2,7,8,9,2


In [23]:
df.loc[:, 4] = pd.Series(['5', '6', '7'], index=df.index)
df


Unnamed: 0,A,B,C,D,4
0,1,2,3,0,5
1,4,5,6,1,6
2,7,8,9,2,7


In [24]:
df_reset = df.reset_index(level=0, drop=True)
df_reset


Unnamed: 0,A,B,C,D,4
0,1,2,3,0,5
1,4,5,6,1,6
2,7,8,9,2,7


In [25]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [40, 50, 60], [23, 35, 37]]),
                  index=[2.5, 12.6, 4.8, 4.8, 2.5],
                  columns=[48, 49, 50])

df.reset_index().drop_duplicates(subset='index', keep='last').set_index('index')


Unnamed: 0_level_0,48,49,50
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12.6,4,5,6
4.8,40,50,60
2.5,23,35,37


In [26]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['A', 'B', 'C'])
df


Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [27]:
df.drop('A', axis=1, inplace=True)
df.drop(df.columns[[1]], axis=1)


Unnamed: 0,B
0,2
1,5
2,8


In [28]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [40, 50, 60], [23, 35, 37]]),
                  index=[2.5, 12.6, 4.8, 4.8, 2.5],
                  columns=[48, 49, 50])

df


Unnamed: 0,48,49,50
2.5,1,2,3
12.6,4,5,6
4.8,7,8,9
4.8,40,50,60
2.5,23,35,37


In [29]:
df.drop_duplicates([48], keep='last')


Unnamed: 0,48,49,50
2.5,1,2,3
12.6,4,5,6
4.8,7,8,9
4.8,40,50,60
2.5,23,35,37


In [30]:
df

df.drop(df.index[1])


Unnamed: 0,48,49,50
2.5,1,2,3
4.8,7,8,9
4.8,40,50,60
2.5,23,35,37


In [31]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['A', 'B', 'C'])
df


Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [32]:
newcols = {
    'A': 'new_column_1',
    'B': 'new_column_2',
    'C': 'new_column_3'
}

df.rename(columns=newcols, inplace=True)
df.rename(index={1: 'a'})


Unnamed: 0,new_column_1,new_column_2,new_column_3
0,1,2,3
a,4,5,6
2,7,8,9


In [33]:
df = pd.DataFrame({'Student1': ['OK', 'Awful', 'Acceptable'],
                   'Student2': ['Perfect', 'Awful', 'OK'],
                   'Student3': ['Acceptable', 'Perfect', 'Poor']})

print(df)

df.replace(['Awful', 'Poor', 'OK', 'Acceptable', 'Perfect'], [0, 1, 2, 3, 4])


     Student1 Student2    Student3
0          OK  Perfect  Acceptable
1       Awful    Awful     Perfect
2  Acceptable       OK        Poor


Unnamed: 0,Student1,Student2,Student3
0,2,4,3
1,0,0,4
2,3,2,1


In [35]:
df = pd.DataFrame([['1\n', 2, '3\n'], [4, 5, '6\n'], [7, '8\n', 9]])
print(df)

df.replace({'\n': ''}, regex=True)


     0    1    2
0  1\n    2  3\n
1    4    5  6\n
2    7  8\n    9


Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [36]:
df = pd.DataFrame([['+-1aAbBcC', '2', '+-3aAbBcC'], ['4', '5', '+-6aAbBcC'], ['7', '+-8aAbBcC', '9']])
print(df)

df[0] = df[0].map(lambda x: x.lstrip('+-').rstrip('aAbBcC'))
df


           0          1          2
0  +-1aAbBcC          2  +-3aAbBcC
1          4          5  +-6aAbBcC
2          7  +-8aAbBcC          9


Unnamed: 0,0,1,2
0,1,2,+-3aAbBcC
1,4,5,+-6aAbBcC
2,7,+-8aAbBcC,9


In [37]:
df = pd.DataFrame({'Age': [34, 22, 19],
                   'PlusOne': [0, 0, 1],
                   'Ticket': ['23:44:55', '66:77:88', '43:68:05 56:34:12']})

print(df)

ticket_series = df['Ticket'].str.split(' ').apply(pd.Series, 1).stack()
ticket_series.index = ticket_series.index.droplevel(-1)
ticket_series


   Age  PlusOne             Ticket
0   34        0           23:44:55
1   22        0           66:77:88
2   19        1  43:68:05 56:34:12


  ticket_series = df['Ticket'].str.split(' ').apply(pd.Series, 1).stack()


0    23:44:55
1    66:77:88
2    43:68:05
2    56:34:12
dtype: object

In [38]:
doubler = lambda x: x * 2


In [40]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['A', 'B', 'C'])
print(df)

df['A'].apply(doubler)


   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9


0     2
1     8
2    14
Name: A, dtype: int64

In [41]:
df.loc[0].apply(doubler)


A    2
B    4
C    6
Name: 0, dtype: int64

In [42]:
doubled_df = df.applymap(doubler)
doubled_df


  doubled_df = df.applymap(doubler)


Unnamed: 0,A,B,C
0,2,4,6
1,8,10,12
2,14,16,18


In [43]:
def doubler(x):
    if x % 2 == 0:
        return x
    else:
        return x * 2


In [44]:
doubled_df = df.applymap(doubler)
doubled_df


  doubled_df = df.applymap(doubler)


Unnamed: 0,A,B,C
0,2,2,6
1,4,10,6
2,14,8,18


In [45]:
df = pd.DataFrame(np.nan, index=[0, 1, 2, 3], columns=['A'])
df


Unnamed: 0,A
0,
1,
2,
3,


In [46]:
df = pd.DataFrame(index=range(4), columns=['A'], dtype='float')
df


Unnamed: 0,A
0,
1,
2,
3,


In [47]:
# ERROR
#pd.read_csv('yourFile', parse_dates=True)
#pd.read_csv('yourFile', parse_dates=['columnName'])


In [49]:
dateparser = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')

# ERROR
#pd.read_csv(infile, parse_dates=['columnName'], date_parser=dateparser)
#pd.read_csv(infile, parse_dates={'datetime': ['date', 'time']}, date_parser=dateparser)


In [50]:
products = pd.DataFrame({
    'category': ['Cleaning', 'Cleaning', 'Entertainment', 'Entertainment', 'Tech', 'Tech'],
    'store': ['Walmart', 'Dia', 'Walmart', 'Fnac', 'Dia', 'Walmart'],
    'price': [11.42, 23.50, 19.99, 15.95, 55.75, 111.55],
    'testscore': [4, 3, 5, 7, 5, 8]
})


In [51]:
pivot_products = products.pivot(index='category', columns='store', values='price')
pivot_products


store,Dia,Fnac,Walmart
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cleaning,23.5,,11.42
Entertainment,,15.95,19.99
Tech,55.75,,111.55


In [52]:
pivot_products = products.pivot(index='category', columns='store')
pivot_products


Unnamed: 0_level_0,price,price,price,testscore,testscore,testscore
store,Dia,Fnac,Walmart,Dia,Fnac,Walmart
category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Cleaning,23.5,,11.42,3.0,,4.0
Entertainment,,15.95,19.99,,7.0,5.0
Tech,55.75,,111.55,5.0,,8.0


In [55]:
pivot_products = products.pivot_table(index='category', columns='store', values='price', aggfunc='mean')
pivot_products


store,Dia,Fnac,Walmart
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cleaning,23.5,,11.42
Entertainment,,15.95,19.99
Tech,55.75,,111.55


In [56]:
people = pd.DataFrame({
    'FirstName': ['John', 'Jane'],
    'LastName': ['Doe', 'Austen'],
    'BloodType': ['A-', 'B+'],
    'Weight': [90, 64]
})

print(pd.melt(people, id_vars=['FirstName', 'LastName'], var_name='measurements'))


  FirstName LastName measurements value
0      John      Doe    BloodType    A-
1      Jane   Austen    BloodType    B+
2      John      Doe       Weight    90
3      Jane   Austen       Weight    64


In [57]:
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=['A', 'B', 'C'])

for index, row in df.iterrows():
    print(row['A'], row['B'])


1 2
4 5
7 8


In [58]:
df.to_csv('myDataFrame.csv')


In [60]:
df.to_csv('myDataFrame.csv', sep='\t')


In [61]:
df.to_csv('myDataFrame.csv', sep='\t', encoding='utf-8')


In [63]:
writer = pd.ExcelWriter('myDataFrame.xlsx')
df.to_excel(writer, 'DataFrame')

writer.close()
