type inference- Type inference refers to the automatic detection of the data type of an expression in a programming language. It is a feature present in some strongly statically typed languages.

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('Chapter6_data_UFO.csv')

In [3]:
df

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00
5,Valley City,,DISK,ND,9/15/1934 15:30
6,Crater Lake,,CIRCLE,CA,6/15/1935 0:00
7,Alma,,DISK,MI,7/15/1936 0:00
8,Eklutna,,CIGAR,AK,10/15/1936 17:00
9,Hubbard,,CYLINDER,OR,6/15/1937 0:00


In [4]:
pd.read_table('Chapter6_data_UFO.csv', sep=',')

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00
5,Valley City,,DISK,ND,9/15/1934 15:30
6,Crater Lake,,CIRCLE,CA,6/15/1935 0:00
7,Alma,,DISK,MI,7/15/1936 0:00
8,Eklutna,,CIGAR,AK,10/15/1936 17:00
9,Hubbard,,CYLINDER,OR,6/15/1937 0:00


In [5]:
df = pd.read_csv('Chapter6_data_UFO.csv', header=None)

In [6]:
df

Unnamed: 0,0,1,2,3,4
0,City,Colors Reported,Shape Reported,State,Time
1,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
2,Willingboro,,OTHER,NJ,6/30/1930 20:00
3,Holyoke,,OVAL,CO,2/15/1931 14:00
4,Abilene,,DISK,KS,6/1/1931 13:00
5,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00
6,Valley City,,DISK,ND,9/15/1934 15:30
7,Crater Lake,,CIRCLE,CA,6/15/1935 0:00
8,Alma,,DISK,MI,7/15/1936 0:00
9,Eklutna,,CIGAR,AK,10/15/1936 17:00


### Change Names of Headers

In [7]:
df = pd.read_csv('Chapter6_data_UFO.csv', sep=',', names=['A','B','C','D','E'])

In [8]:
df.head()

Unnamed: 0,A,B,C,D,E
0,City,Colors Reported,Shape Reported,State,Time
1,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
2,Willingboro,,OTHER,NJ,6/30/1930 20:00
3,Holyoke,,OVAL,CO,2/15/1931 14:00
4,Abilene,,DISK,KS,6/1/1931 13:00


In [9]:
df = pd.read_csv('Chapter6_data_UFO.csv', sep=',', names=['A','B','C','D','E'])

## What happens if there is no ',' to seperate values?

While you could do some munging by hand, the fields here are separated by a vari‐
able amount of whitespace. In these cases, you can pass a regular expression as a
delimiter for read_table . This can be expressed by the regular expression \s+

### NULL values


In [10]:
df.isnull().head()

Unnamed: 0,A,B,C,D,E
0,False,False,False,False,False
1,False,True,False,False,False
2,False,True,False,False,False
3,False,True,False,False,False
4,False,True,False,False,False


In [11]:
pd.options.display.max_rows = 10

In [12]:
df

Unnamed: 0,A,B,C,D,E
0,City,Colors Reported,Shape Reported,State,Time
1,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
2,Willingboro,,OTHER,NJ,6/30/1930 20:00
3,Holyoke,,OVAL,CO,2/15/1931 14:00
4,Abilene,,DISK,KS,6/1/1931 13:00
...,...,...,...,...,...
18237,Grant Park,,TRIANGLE,IL,12/31/2000 23:00
18238,Spirit Lake,,DISK,IA,12/31/2000 23:00
18239,Eagle River,,,WI,12/31/2000 23:45
18240,Eagle River,RED,LIGHT,WI,12/31/2000 23:45


In [13]:
df_JSON = pd.read_json('Chapter6_JSONdata_jobArguments.json')

In [14]:
df_JSON

Unnamed: 0,returnCode,message,Payload
0,200,,"{'arg_name': 'eventSince', 'data_type': 'Text'..."
1,200,,"{'arg_name': 'start_date', 'data_type': 'DateT..."
2,200,,"{'arg_name': 'end_date', 'data_type': 'DateTim..."


In [15]:
df_JSON['Payload']

0    {'arg_name': 'eventSince', 'data_type': 'Text'...
1    {'arg_name': 'start_date', 'data_type': 'DateT...
2    {'arg_name': 'end_date', 'data_type': 'DateTim...
Name: Payload, dtype: object

In [18]:
df_JSON['Payload'][0]

{'arg_name': 'eventSince',
 'data_type': 'Text',
 'format': '',
 'default_value': '96',
 'container': 'text',
 'lookup': '',
 'readonly': True}

In [19]:
import json

In [22]:
obj = """

{
    "returnCode": "200",
    "message": "",
    "Payload": [{
            "arg_name": "eventSince",
            "data_type": "Text",
            "format": "",
            "default_value": "96",
            "container": "text",
            "lookup": "",
            "readonly": true
        },
        {
            "arg_name": "start_date",
            "data_type": "DateTime",
            "format": "yyyy-mm-dd hh:mm:ss",
            "default_value": "anyDefault",
            "container": "dateTime",
            "lookup": "",
            "readonly": false
        },
        {
            "arg_name": "end_date",
            "data_type": "DateTime",
            "format": "dd-mm-yyyy hh:mm:ss",
            "default_value": "",
            "container": "dateTime",
            "lookup": "",
            "readonly": false
        }
    ]
}

"""

In [24]:
results = json.loads(obj)

In [25]:
results['Payload']

[{'arg_name': 'eventSince',
  'data_type': 'Text',
  'format': '',
  'default_value': '96',
  'container': 'text',
  'lookup': '',
  'readonly': True},
 {'arg_name': 'start_date',
  'data_type': 'DateTime',
  'format': 'yyyy-mm-dd hh:mm:ss',
  'default_value': 'anyDefault',
  'container': 'dateTime',
  'lookup': '',
  'readonly': False},
 {'arg_name': 'end_date',
  'data_type': 'DateTime',
  'format': 'dd-mm-yyyy hh:mm:ss',
  'default_value': '',
  'container': 'dateTime',
  'lookup': '',
  'readonly': False}]

In [26]:
asjson = json.dumps(results)

In [27]:
asjson

'{"returnCode": "200", "message": "", "Payload": [{"arg_name": "eventSince", "data_type": "Text", "format": "", "default_value": "96", "container": "text", "lookup": "", "readonly": true}, {"arg_name": "start_date", "data_type": "DateTime", "format": "yyyy-mm-dd hh:mm:ss", "default_value": "anyDefault", "container": "dateTime", "lookup": "", "readonly": false}, {"arg_name": "end_date", "data_type": "DateTime", "format": "dd-mm-yyyy hh:mm:ss", "default_value": "", "container": "dateTime", "lookup": "", "readonly": false}]}'

In [28]:
payloads = pd.DataFrame(results['Payload'], columns=['readonly','container'])

In [29]:
payloads

Unnamed: 0,readonly,container
0,True,text
1,False,dateTime
2,False,dateTime


In [30]:
df_JSON.to_json()

'{"returnCode":{"0":200,"1":200,"2":200},"message":{"0":"","1":"","2":""},"Payload":{"0":{"arg_name":"eventSince","data_type":"Text","format":"","default_value":"96","container":"text","lookup":"","readonly":true},"1":{"arg_name":"start_date","data_type":"DateTime","format":"yyyy-mm-dd hh:mm:ss","default_value":"anyDefault","container":"dateTime","lookup":"","readonly":false},"2":{"arg_name":"end_date","data_type":"DateTime","format":"dd-mm-yyyy hh:mm:ss","default_value":"","container":"dateTime","lookup":"","readonly":false}}}'

In [31]:
df_JSON.to_json(orient='records')

'[{"returnCode":200,"message":"","Payload":{"arg_name":"eventSince","data_type":"Text","format":"","default_value":"96","container":"text","lookup":"","readonly":true}},{"returnCode":200,"message":"","Payload":{"arg_name":"start_date","data_type":"DateTime","format":"yyyy-mm-dd hh:mm:ss","default_value":"anyDefault","container":"dateTime","lookup":"","readonly":false}},{"returnCode":200,"message":"","Payload":{"arg_name":"end_date","data_type":"DateTime","format":"dd-mm-yyyy hh:mm:ss","default_value":"","container":"dateTime","lookup":"","readonly":false}}]'

In [32]:
conda install lxml

SyntaxError: invalid syntax (<ipython-input-32-f7512ca59e33>, line 1)