# Data processing

![gif](imgs/DP002.gif)

## Imports

In [1]:
import pandas as pd
import numpy as np
import json
import csv
import sys

## Save files

### classic

In [2]:
df = pd.read_csv('exs/ex5.csv')
df

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [3]:
df.to_csv('exs/out.csv')

### sys.stdout

In [4]:
df.to_csv(sys.stdout, sep='|')

|something|a|b|c|d|message
0|one|1|2|3.0|4|
1|two|5|6||8|world
2|three|9|10|11.0|12|foo


### another marker for NA values

In [5]:
df.to_csv(sys.stdout, na_rep='NULL')

,something,a,b,c,d,message
0,one,1,2,3.0,4,NULL
1,two,5,6,NULL,8,world
2,three,9,10,11.0,12,foo


### hide index and cols

In [6]:
df.to_csv(sys.stdout, index=False, columns=list('abc'))

a,b,c
1,2,3.0
5,6,
9,10,11.0


### for series

In [7]:
dates = pd.date_range('1/1/2000', periods=7)
dates

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07'],
              dtype='datetime64[ns]', freq='D')

In [8]:
s = pd.Series(np.arange(7), index=dates)
s

2000-01-01    0
2000-01-02    1
2000-01-03    2
2000-01-04    3
2000-01-05    4
2000-01-06    5
2000-01-07    6
Freq: D, dtype: int32

In [9]:
s.to_csv('exs/outs.csv')

## JSON

In [10]:
obj ="""
{
    "name 1": "Wes",
    "places_lived": [ "United States", "Spain", "Germany"],
    "pet": null,
    "strings": [{"name": "Scott", "age": 25, "pet": "Zuko"},
    {"name": "Katie", "age": 33, "pet": "Cisco"}] 
}
"""

In [11]:
j = json.loads(obj)
j

{'name 1': 'Wes',
 'places_lived': ['United States', 'Spain', 'Germany'],
 'pet': None,
 'strings': [{'name': 'Scott', 'age': 25, 'pet': 'Zuko'},
  {'name': 'Katie', 'age': 33, 'pet': 'Cisco'}]}