In [2]:
import pandas as pd
import numpy as np

In [3]:
rng = np.random.default_rng(42)

## Dataframes from NumPy data

In [4]:
df = pd.DataFrame(
    data=rng.integers(-10, 10, size=(2, 3)),
    columns=["First", "Second", "Third"],
    index=["a", "b"])
df

Unnamed: 0,First,Second,Third
a,-9,5,3
b,-2,-2,7


In [5]:
df.index

Index(['a', 'b'], dtype='object')

In [6]:
df.columns

Index(['First', 'Second', 'Third'], dtype='object')

In [7]:

df2 = pd.DataFrame(rng.random(size=(2, 3)), index=["A", "B"])
df2

Unnamed: 0,0,1,2
A,0.697368,0.094177,0.975622
B,0.76114,0.786064,0.128114


In [8]:
df2.columns  # Similar to Python's range type

RangeIndex(start=0, stop=3, step=1)

In [9]:
s1 = pd.Series([1, 2, 3])

In [10]:
df3 = pd.DataFrame(s1, columns=['a'])  # Giving the column name explicitly
df3

Unnamed: 0,a
0,1
1,2
2,3


In [11]:
s2 = pd.Series([4, 5, 6], name='b')
s2

0    4
1    5
2    6
Name: b, dtype: int64

In [12]:
pd.DataFrame(s2)

Unnamed: 0,b
0,4
1,5
2,6


In [13]:
pd.DataFrame({"a": s1, "b": s2})

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [14]:
df=pd.DataFrame([{"Wage" : 1000, "Name" : "Jack", "Age" : 21}, {"Wage" : 1500, "Name" : "John", "Age" : 29}])
df

Unnamed: 0,Wage,Name,Age
0,1000,Jack,21
1,1500,John,29


In [16]:
df = pd.DataFrame([[1000, "Jack", 21], [1500, "John", 29]], columns=["Wage", "Name", "Age"])
df

Unnamed: 0,Wage,Name,Age
0,1000,Jack,21
1,1500,John,29


In [19]:
cols = ['Population', 'Total area']
idx = ['Helsinki', 'Espoo', 'Tampere', 'Vantaa', 'Oulu']
dat = [[643272, 715.48], [279044, 528.03], [231853, 689.59], [223027, 240.35], [201810, 3817.52]]

In [21]:
cities = pd.DataFrame(data=dat, columns=cols, index=idx)
cities

Unnamed: 0,Population,Total area
Helsinki,643272,715.48
Espoo,279044,528.03
Tampere,231853,689.59
Vantaa,223027,240.35
Oulu,201810,3817.52


In [22]:
dc = {'Helsinki': [643272, 715.48], 
      'Espoo': [279044, 528.03],
      'Tampere': [231853, 689.59], 
      'Vantaa': [223027, 240.35], 
      'Oulu': [201810, 3817.52]}

In [24]:
c = pd.DataFrame(dc, columns=cols)
c

Unnamed: 0,Population,Total area


# E4.2 (powers of series)
Make function powers_of_series that takes a Series and a positive integer k as parameters and returns a DataFrame. The resulting DataFrame should have the same index as the input Series. The first column of the dataFrame should be the input Series, the second column should contain the Series raised to power of two. The third column should contain the Series raised to the power of three, and so on until (and including) power of k. The columns should have indices from 1 to k.

In [44]:
def powers_of_series(series, k):
    data = np.column_stack([series**i for i in range(1, k + 1)])
    df = pd.DataFrame(data, index=series.index)
    df.columns = range(1, k+1)
    return df

In [47]:
ss = pd.Series([1,2,3,4], index=list("abcd"))
print(powers_of_series(ss, 3))

   1   2   3
a  1   1   1
b  2   4   8
c  3   9  27
d  4  16  64


### Accessing cols and rows of a df
 

In [48]:
df

Unnamed: 0,Wage,Name,Age
0,1000,Jack,21
1,1500,John,29


In [49]:
df["Wage"]

0    1000
1    1500
Name: Wage, dtype: int64

In [50]:
df[["Wage", "Name"]]

Unnamed: 0,Wage,Name
0,1000,Jack
1,1500,John


In [56]:
df[0:1]  # slice

Unnamed: 0,Wage,Name,Age
0,1000,Jack,21


In [57]:
df[df["Wage"] > 1200]   # boolean mask

Unnamed: 0,Wage,Name,Age
1,1500,John,29


In [59]:
df["Wage"][1]

np.int64(1500)

### 4.4 (municipalities of finland)
Load again the municipal information DataFrame. The rows of the DataFrame correspond to various geographical areas of Finland. The first row is about Finland as a whole, then rows from Akaa to Äänekoski are municipalities of Finland in alphabetical order. After that some larger regions are listed.

Write function municipalities_of_finland that returns a DataFrame containing only rows about municipalities. Give an appropriate argument for pd.read_csv so that it interprets the column about region name as the (row) index. This way you can index the DataFrame with the names of the regions.

In [None]:
fpath = "data/municipal.tsv"
findata = pd.read_csv(fpath, )