# Numpy
## 파일 입출력

In [3]:
import numpy as np
import matplotlib.pyplot as plt

### loadtxt()

In [2]:
help(np.loadtxt)

Help on function loadtxt in module numpy.lib.npyio:

loadtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes')
    Load data from a text file.
    
    Each row in the text file must have the same number of values.
    
    Parameters
    ----------
    fname : file, str, or pathlib.Path
        File, filename, or generator to read.  If the filename extension is
        ``.gz`` or ``.bz2``, the file is first decompressed. Note that
        generators should return byte strings for Python 3k.
    dtype : data-type, optional
        Data-type of the resulting array; default: float.  If this is a
        structured data-type, the resulting array will be 1-dimensional, and
        each row will be interpreted as an element of the array.  In this
        case, the number of columns used must match the number of fields in
        the data-type.
    comments : str or sequence of str, optional
       

In [3]:
X = np.loadtxt('iris.csv', skiprows=1, delimiter=',', usecols=[0,1,2,3])
X.shape

(150, 4)

In [5]:
labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

iris = np.loadtxt('iris.csv', skiprows=1, delimiter=',',
                  converters={4: lambda s: labels.index(s.decode())})
iris.shape

(150, 5)

- encoding : latin1, ascii, utf-8(파이썬 기본 인코딩), cp949(윈도우 기본 인코딩)

In [4]:
labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

iris = np.loadtxt('iris.csv', skiprows=1, delimiter=',',
                  converters={4: lambda s: labels.index(s)},
                  encoding='latin1')
iris.shape

(150, 5)

### genfromtxt()
- loadtxt() 보다 세밀한 기능 제공

In [8]:
help(np.genfromtxt)

Help on function genfromtxt in module numpy.lib.npyio:

genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes')
    Load data from a text file, with missing values handled as specified.
    
    Each line past the first `skip_header` lines is split at the `delimiter`
    character, and characters following the `comments` character are discarded.
    
    Parameters
    ----------
    fname : file, str, pathlib.Path, list of str, generator
        File, filename, list, or generator to read.  If the filename
        extension is `.gz` or `.bz2`, the file is first decompressed. Note
        that generators must return byte strings in Python 3k.  The strings
   

In [15]:
labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

iris = np.genfromtxt('iris.csv', skip_header=1, delimiter=',',
                     converters={4: lambda s: float(labels.index(s.decode()))})
iris.shape

(150, 5)

In [17]:
iris[0]

array([5.1, 3.5, 1.4, 0.2, 0. ])

### savetxt()

In [18]:
help(np.savetxt)

Help on function savetxt in module numpy.lib.npyio:

savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None)
    Save an array to a text file.
    
    Parameters
    ----------
    fname : filename or file handle
        If the filename ends in ``.gz``, the file is automatically saved in
        compressed gzip format.  `loadtxt` understands gzipped files
        transparently.
    X : 1D or 2D array_like
        Data to be saved to a text file.
    fmt : str or sequence of strs, optional
        A single format (%10.5f), a sequence of formats, or a
        multi-format string, e.g. 'Iteration %d -- %10.5f', in which
        case `delimiter` is ignored. For complex `X`, the legal options
        for `fmt` are:
    
        * a single specifier, `fmt='%.4e'`, resulting in numbers formatted
          like `' (%s+%sj)' % (fmt, fmt)`
        * a full string specifying every real and imaginary part, e.g.
          `' %.4e %+.4ej %.4e 

In [5]:
np.savetxt('iris2.csv', iris, delimiter='김현호', fmt='%.2f')

In [10]:
iris = np.loadtxt('iris2.csv', delimiter='김현호', encoding='cp949')
iris.shape

(150, 5)

### 기상 데이터 불러오기

In [28]:
s = '108,2015-01-01,-7.7,-9.8,745,-4.3,1500,,,,,,,12.1,270,510,6.5,320,1256,4.6,3999,-18.8,29,8,41.4,1.4,1013.6,1026.7,2216,1022.8,8,1024.9,9.6,8.9,1200,1.7,9.79,,,,,,.8,0,-4.1,-10.9,-3.4,-2,-.2,.7,3.4,5.6,8.3,15.5,17.3,,1.4,,{황사}0615-{황사}{강도0}0900-0955.,'
cols = s.strip().split(',')
cols

['108',
 '2015-01-01',
 '-7.7',
 '-9.8',
 '745',
 '-4.3',
 '1500',
 '',
 '',
 '',
 '',
 '',
 '',
 '12.1',
 '270',
 '510',
 '6.5',
 '320',
 '1256',
 '4.6',
 '3999',
 '-18.8',
 '29',
 '8',
 '41.4',
 '1.4',
 '1013.6',
 '1026.7',
 '2216',
 '1022.8',
 '8',
 '1024.9',
 '9.6',
 '8.9',
 '1200',
 '1.7',
 '9.79',
 '',
 '',
 '',
 '',
 '',
 '.8',
 '0',
 '-4.1',
 '-10.9',
 '-3.4',
 '-2',
 '-.2',
 '.7',
 '3.4',
 '5.6',
 '8.3',
 '15.5',
 '17.3',
 '',
 '1.4',
 '',
 '{황사}0615-{황사}{강도0}0900-0955.',
 '']

In [29]:
len(cols)

60

In [16]:
for i,c in enumerate(cols):
    print('%03d => %s' % (i,c))

000 => 지점
001 => 일시
002 => 평균기온(°C)
003 => 최저기온(°C)
004 => 최저기온 시각(hhmi)
005 => 최고기온(°C)
006 => 최고기온 시각(hhmi)
007 => 강수 계속시간(hr)
008 => 10분 최다 강수량(mm)
009 => 10분 최다강수량 시각(hhmi)
010 => 1시간 최다강수량(mm)
011 => 1시간 최다 강수량 시각(hhmi)
012 => 일강수량(mm)
013 => 최대 순간 풍속(m/s)
014 => 최대 순간 풍속 풍향(16방위)
015 => 최대 순간풍속 시각(hhmi)
016 => 최대 풍속(m/s)
017 => 최대 풍속 풍향(16방위)
018 => 최대 풍속 시각(hhmi)
019 => 평균 풍속(m/s)
020 => 풍정합(100m)
021 => 평균 이슬점온도(°C)
022 => 최소 상대습도(%)
023 => 최소 상대습도 시각(hhmi)
024 => 평균 상대습도(%)
025 => 평균 증기압(hPa)
026 => 평균 현지기압(hPa)
027 => 최고 해면기압(hPa)
028 => 최고 해면기압 시각(hhmi)
029 => 최저 해면기압(hPa)
030 => 최저 해면기압 시각(hhmi)
031 => 평균 해면기압(hPa)
032 => 가조시간(hr)
033 => 합계 일조 시간(hr)
034 => 1시간 최다일사 시각(hhmi)
035 => 1시간 최다일사량(MJ/m2)
036 => 합계 일사(MJ/m2)
037 => 일 최심신적설(cm)
038 => 일 최심신적설 시각(hhmi)
039 => 일 최심적설(cm)
040 => 일 최심적설 시각(hhmi)
041 => 합계 3시간 신적설(cm)
042 => 평균 전운량(1/10)
043 => 평균 중하층운량(1/10)
044 => 평균 지면온도(°C)
045 => 최저 초상온도(°C)
046 => 평균 5cm 지중온도(°C)
047 => 평균 10cm 지중온도(°C)
048 => 평균 20cm 지중온도(°C)
049

In [26]:
np.loadtxt('기상관측_서울_20181004141633.csv', skiprows=1, delimiter=',',
           usecols=[2,3,5],
           converters = {2: lambda s: float(s) if s!='' else 0,
                        3: lambda s: float(s) if s!='' else 0,
                        5: lambda s: float(s) if s!='' else 0},
           encoding='cp949')

array([[-7.7, -9.8, -4.3],
       [-6. , -9. , -1.9],
       [-2.7, -9.2,  3.1],
       ...,
       [15.4, 13. , 19.7],
       [15.9, 10.3, 22. ],
       [17.3, 11.2, 24.2]])

In [33]:
f = open('기상관측_서울_20181004141633.csv', encoding='cp949')

f.readline()

data = []

for line in f:
    l = line.strip().split(',')
    l2 = [int(i) for i in l[1].split('-')]
    data.append(l2)

f.close()

data

[[2015, 1, 1],
 [2015, 1, 2],
 [2015, 1, 3],
 [2015, 1, 4],
 [2015, 1, 5],
 [2015, 1, 6],
 [2015, 1, 7],
 [2015, 1, 8],
 [2015, 1, 9],
 [2015, 1, 10],
 [2015, 1, 11],
 [2015, 1, 12],
 [2015, 1, 13],
 [2015, 1, 14],
 [2015, 1, 15],
 [2015, 1, 16],
 [2015, 1, 17],
 [2015, 1, 18],
 [2015, 1, 19],
 [2015, 1, 20],
 [2015, 1, 21],
 [2015, 1, 22],
 [2015, 1, 23],
 [2015, 1, 24],
 [2015, 1, 25],
 [2015, 1, 26],
 [2015, 1, 27],
 [2015, 1, 28],
 [2015, 1, 29],
 [2015, 1, 30],
 [2015, 1, 31],
 [2015, 2, 1],
 [2015, 2, 2],
 [2015, 2, 3],
 [2015, 2, 4],
 [2015, 2, 5],
 [2015, 2, 6],
 [2015, 2, 7],
 [2015, 2, 8],
 [2015, 2, 9],
 [2015, 2, 10],
 [2015, 2, 11],
 [2015, 2, 12],
 [2015, 2, 13],
 [2015, 2, 14],
 [2015, 2, 15],
 [2015, 2, 16],
 [2015, 2, 17],
 [2015, 2, 18],
 [2015, 2, 19],
 [2015, 2, 20],
 [2015, 2, 21],
 [2015, 2, 22],
 [2015, 2, 23],
 [2015, 2, 24],
 [2015, 2, 25],
 [2015, 2, 26],
 [2015, 2, 27],
 [2015, 2, 28],
 [2015, 3, 1],
 [2015, 3, 2],
 [2015, 3, 3],
 [2015, 3, 4],
 [2015, 3, 5],

### 어레이를 바이너리 파일로 저장하기

In [22]:
np.save('iris.npy', iris)

In [23]:
iris2 = np.load('iris.npy')
iris2.shape

(150, 5)

In [24]:
np.savez('iris.npz', X=iris[:,:4], y=iris[:,4])

In [25]:
arch = np.load('iris.npz')

In [27]:
arch['X'].shape, arch['y'].shape

((150, 4), (150,))